spool-memory 0.2.3

//! Heuristic enrichment for lifecycle records.
//!
//! Extracts structured fields (entities, tags, triggers) from a record's
//! title and summary using rule-based heuristics. Used by `sync-vault --enrich`
//! to backfill older records that lack these fields.

use crate::domain::MemoryRecord;

/// Result of enrichment analysis for a single record.
#[derive(Debug, Clone, Default)]
pub struct EnrichmentPatch {
    pub entities: Vec<String>,
    pub tags: Vec<String>,
    pub triggers: Vec<String>,
}

impl EnrichmentPatch {
    pub fn is_empty(&self) -> bool {
        self.entities.is_empty() && self.tags.is_empty() && self.triggers.is_empty()
    }
}

/// Known tool/library/technology names for entity extraction.
const KNOWN_TOOLS: &[&str] = &[
    "Rust",
    "TypeScript",
    "JavaScript",
    "Python",
    "Go",
    "Java",
    "Swift",
    "React",
    "Vue",
    "Svelte",
    "Angular",
    "Next.js",
    "Nuxt",
    "SQLite",
    "PostgreSQL",
    "MySQL",
    "Redis",
    "MongoDB",
    "Docker",
    "Kubernetes",
    "Terraform",
    "AWS",
    "GCP",
    "Azure",
    "Git",
    "GitHub",
    "GitLab",
    "Obsidian",
    "Tauri",
    "Electron",
    "tantivy",
    "tokio",
    "serde",
    "clap",
    "anyhow",
    "npm",
    "cargo",
    "pip",
    "brew",
    "MCP",
    "JSON-RPC",
    "REST",
    "GraphQL",
    "gRPC",
    "OAuth",
    "JWT",
    "CSRF",
    "XSS",
    "CORS",
    "CI",
    "CD",
    "TDD",
    "BDD",
];

/// Stop words to skip when extracting triggers from titles.
const STOP_WORDS: &[&str] = &[
    "the", "a", "an", "is", "are", "was", "were", "be", "been", "to", "of", "in", "for", "on",
    "with", "at", "by", "from", "and", "or", "not", "no", "but", "if", "then", "else", "this",
    "that", "it", "its", "my", "your", "our", "do", "does", "did", "will", "would", "should",
    "could", "have", "has", "had", "can", "may", "might", "use", "using", "used",
];

/// Tag inference rules: keyword patterns mapped to tag names.
const TAG_RULES: &[(&[&str], &str)] = &[
    (
        &[
            "database", "db", "sql", "postgres", "sqlite", "mysql", "redis", "mongo",
        ],
        "database",
    ),
    (
        &["test", "spec", "assert", "mock", "tdd", "coverage"],
        "testing",
    ),
    (
        &["auth", "oauth", "jwt", "login", "session", "permission"],
        "auth",
    ),
    (
        &["api", "endpoint", "route", "rest", "graphql", "grpc"],
        "api",
    ),
    (
        &["deploy", "ci", "cd", "pipeline", "docker", "kubernetes"],
        "devops",
    ),
    (
        &["security", "csrf", "xss", "cors", "vulnerability", "secret"],
        "security",
    ),
    (
        &["performance", "cache", "optimize", "latency", "throughput"],
        "performance",
    ),
    (
        &["ui", "frontend", "component", "layout", "style", "css"],
        "frontend",
    ),
    (
        &["config", "setting", "environment", "env", "toml", "yaml"],
        "config",
    ),
    (
        &["error", "exception", "panic", "crash", "bug", "fix"],
        "error-handling",
    ),
    (
        &["refactor", "cleanup", "rename", "restructure", "simplify"],
        "refactoring",
    ),
    (
        &["document", "readme", "comment", "doc", "guide"],
        "documentation",
    ),
];

/// Analyze a record and produce an enrichment patch with entities, tags, and triggers.
/// Only returns fields that the record currently lacks (empty vectors).
pub fn enrich_record(record: &MemoryRecord) -> EnrichmentPatch {
    let text = format!("{} {}", record.title, record.summary);
    let text_lower = text.to_lowercase();

    let entities = if record.entities.is_empty() {
        extract_entities(&text)
    } else {
        Vec::new()
    };

    let tags = if record.tags.is_empty() {
        extract_tags(&text_lower, &record.memory_type)
    } else {
        Vec::new()
    };

    let triggers = if record.triggers.is_empty() {
        extract_triggers(&record.title)
    } else {
        Vec::new()
    };

    EnrichmentPatch {
        entities,
        tags,
        triggers,
    }
}

/// Extract entities from text: known tools/libraries and capitalized multi-char words.
fn extract_entities(text: &str) -> Vec<String> {
    let mut entities = Vec::new();
    let mut seen = std::collections::HashSet::new();

    // Check for known tools (case-insensitive match, preserve canonical casing)
    let text_lower = text.to_lowercase();
    for tool in KNOWN_TOOLS {
        let tool_lower = tool.to_lowercase();
        if text_lower.contains(&tool_lower) && seen.insert(tool_lower) {
            entities.push(tool.to_string());
        }
    }

    // Extract capitalized words that look like proper nouns (>= 2 chars, starts uppercase)
    for word in text.split_whitespace() {
        let cleaned = word.trim_matches(|c: char| !c.is_alphanumeric());
        if cleaned.len() >= 2
            && cleaned
                .chars()
                .next()
                .map(|c| c.is_uppercase())
                .unwrap_or(false)
            && !cleaned
                .chars()
                .all(|c| c.is_uppercase() || !c.is_alphabetic())
            && !STOP_WORDS.contains(&cleaned.to_lowercase().as_str())
        {
            let lower = cleaned.to_lowercase();
            // Skip if already captured as a known tool
            if seen.insert(lower) {
                entities.push(cleaned.to_string());
            }
        }
    }

    // Cap at 5 entities
    entities.truncate(5);
    entities
}

/// Extract tags based on keyword matching and memory_type mapping.
fn extract_tags(text_lower: &str, memory_type: &str) -> Vec<String> {
    let mut tags = Vec::new();

    // Memory type → base tag
    match memory_type {
        "constraint" => tags.push("constraint".to_string()),
        "decision" => tags.push("decision".to_string()),
        "incident" => tags.push("incident".to_string()),
        "workflow" => tags.push("workflow".to_string()),
        "pattern" => tags.push("pattern".to_string()),
        _ => {}
    }

    // Keyword-based tag inference
    for (keywords, tag) in TAG_RULES {
        if keywords.iter().any(|kw| text_lower.contains(kw)) && !tags.contains(&tag.to_string()) {
            tags.push(tag.to_string());
        }
    }

    // Cap at 4 tags
    tags.truncate(4);
    tags
}

/// Extract triggers: first 2-3 significant words from the title.
fn extract_triggers(title: &str) -> Vec<String> {
    let words: Vec<&str> = title
        .split_whitespace()
        .map(|w| w.trim_matches(|c: char| !c.is_alphanumeric() && c != '-' && c != '_'))
        .filter(|w| w.len() >= 2 && !STOP_WORDS.contains(&w.to_lowercase().as_str()))
        .take(3)
        .collect();

    words.iter().map(|w| w.to_lowercase()).collect()
}

#[cfg(test)]
mod tests {
    use super::*;
    use crate::domain::{
        MemoryLifecycleState, MemoryOrigin, MemoryRecord, MemoryScope, MemorySourceKind,
    };

    fn make_record(title: &str, summary: &str, memory_type: &str) -> MemoryRecord {
        MemoryRecord {
            title: title.to_string(),
            summary: summary.to_string(),
            memory_type: memory_type.to_string(),
            scope: MemoryScope::User,
            state: MemoryLifecycleState::Accepted,
            origin: MemoryOrigin {
                source_kind: MemorySourceKind::Manual,
                source_ref: "test".to_string(),
            },
            project_id: None,
            user_id: None,
            sensitivity: None,
            entities: Vec::new(),
            tags: Vec::new(),
            triggers: Vec::new(),
            related_files: Vec::new(),
            related_records: Vec::new(),
            supersedes: None,
            applies_to: Vec::new(),
            valid_until: None,
        }
    }

    #[test]
    fn enrich_should_extract_known_tools_as_entities() {
        let record = make_record(
            "Use PostgreSQL for persistence",
            "Store lifecycle data in PostgreSQL with Docker for local dev",
            "decision",
        );
        let patch = enrich_record(&record);
        assert!(patch.entities.iter().any(|e| e == "PostgreSQL"));
        assert!(patch.entities.iter().any(|e| e == "Docker"));
    }

    #[test]
    fn enrich_should_extract_tags_from_keywords() {
        let record = make_record(
            "Database migration strategy",
            "Always use reversible SQL migrations",
            "decision",
        );
        let patch = enrich_record(&record);
        assert!(patch.tags.contains(&"decision".to_string()));
        assert!(patch.tags.contains(&"database".to_string()));
    }

    #[test]
    fn enrich_should_extract_triggers_from_title() {
        let record = make_record(
            "Prefer immutable data structures",
            "Use immutable patterns to avoid side effects",
            "constraint",
        );
        let patch = enrich_record(&record);
        assert!(!patch.triggers.is_empty());
        assert!(patch.triggers.contains(&"prefer".to_string()));
        assert!(patch.triggers.contains(&"immutable".to_string()));
    }

    #[test]
    fn enrich_should_skip_fields_that_already_have_values() {
        let mut record = make_record(
            "Use Rust for CLI",
            "Rust provides good performance",
            "decision",
        );
        record.entities = vec!["Rust".to_string()];
        record.tags = vec!["language".to_string()];
        // triggers is still empty

        let patch = enrich_record(&record);
        assert!(
            patch.entities.is_empty(),
            "should skip entities since record already has them"
        );
        assert!(
            patch.tags.is_empty(),
            "should skip tags since record already has them"
        );
        assert!(!patch.triggers.is_empty(), "should still extract triggers");
    }

    #[test]
    fn enrich_empty_patch_should_report_is_empty() {
        let mut record = make_record("x", "y", "preference");
        record.entities = vec!["a".to_string()];
        record.tags = vec!["b".to_string()];
        record.triggers = vec!["c".to_string()];
        let patch = enrich_record(&record);
        assert!(patch.is_empty());
    }
}