use crate::domain::MemoryRecord;
#[derive(Debug, Clone, Default)]
pub struct EnrichmentPatch {
pub entities: Vec<String>,
pub tags: Vec<String>,
pub triggers: Vec<String>,
}
impl EnrichmentPatch {
pub fn is_empty(&self) -> bool {
self.entities.is_empty() && self.tags.is_empty() && self.triggers.is_empty()
}
}
const KNOWN_TOOLS: &[&str] = &[
"Rust",
"TypeScript",
"JavaScript",
"Python",
"Go",
"Java",
"Swift",
"React",
"Vue",
"Svelte",
"Angular",
"Next.js",
"Nuxt",
"SQLite",
"PostgreSQL",
"MySQL",
"Redis",
"MongoDB",
"Docker",
"Kubernetes",
"Terraform",
"AWS",
"GCP",
"Azure",
"Git",
"GitHub",
"GitLab",
"Obsidian",
"Tauri",
"Electron",
"tantivy",
"tokio",
"serde",
"clap",
"anyhow",
"npm",
"cargo",
"pip",
"brew",
"MCP",
"JSON-RPC",
"REST",
"GraphQL",
"gRPC",
"OAuth",
"JWT",
"CSRF",
"XSS",
"CORS",
"CI",
"CD",
"TDD",
"BDD",
];
const STOP_WORDS: &[&str] = &[
"the", "a", "an", "is", "are", "was", "were", "be", "been", "to", "of", "in", "for", "on",
"with", "at", "by", "from", "and", "or", "not", "no", "but", "if", "then", "else", "this",
"that", "it", "its", "my", "your", "our", "do", "does", "did", "will", "would", "should",
"could", "have", "has", "had", "can", "may", "might", "use", "using", "used",
];
const TAG_RULES: &[(&[&str], &str)] = &[
(
&[
"database", "db", "sql", "postgres", "sqlite", "mysql", "redis", "mongo",
],
"database",
),
(
&["test", "spec", "assert", "mock", "tdd", "coverage"],
"testing",
),
(
&["auth", "oauth", "jwt", "login", "session", "permission"],
"auth",
),
(
&["api", "endpoint", "route", "rest", "graphql", "grpc"],
"api",
),
(
&["deploy", "ci", "cd", "pipeline", "docker", "kubernetes"],
"devops",
),
(
&["security", "csrf", "xss", "cors", "vulnerability", "secret"],
"security",
),
(
&["performance", "cache", "optimize", "latency", "throughput"],
"performance",
),
(
&["ui", "frontend", "component", "layout", "style", "css"],
"frontend",
),
(
&["config", "setting", "environment", "env", "toml", "yaml"],
"config",
),
(
&["error", "exception", "panic", "crash", "bug", "fix"],
"error-handling",
),
(
&["refactor", "cleanup", "rename", "restructure", "simplify"],
"refactoring",
),
(
&["document", "readme", "comment", "doc", "guide"],
"documentation",
),
];
pub fn enrich_record(record: &MemoryRecord) -> EnrichmentPatch {
let text = format!("{} {}", record.title, record.summary);
let text_lower = text.to_lowercase();
let entities = if record.entities.is_empty() {
extract_entities(&text)
} else {
Vec::new()
};
let tags = if record.tags.is_empty() {
extract_tags(&text_lower, &record.memory_type)
} else {
Vec::new()
};
let triggers = if record.triggers.is_empty() {
extract_triggers(&record.title)
} else {
Vec::new()
};
EnrichmentPatch {
entities,
tags,
triggers,
}
}
fn extract_entities(text: &str) -> Vec<String> {
let mut entities = Vec::new();
let mut seen = std::collections::HashSet::new();
let text_lower = text.to_lowercase();
for tool in KNOWN_TOOLS {
let tool_lower = tool.to_lowercase();
if text_lower.contains(&tool_lower) && seen.insert(tool_lower) {
entities.push(tool.to_string());
}
}
for word in text.split_whitespace() {
let cleaned = word.trim_matches(|c: char| !c.is_alphanumeric());
if cleaned.len() >= 2
&& cleaned
.chars()
.next()
.map(|c| c.is_uppercase())
.unwrap_or(false)
&& !cleaned
.chars()
.all(|c| c.is_uppercase() || !c.is_alphabetic())
&& !STOP_WORDS.contains(&cleaned.to_lowercase().as_str())
{
let lower = cleaned.to_lowercase();
if seen.insert(lower) {
entities.push(cleaned.to_string());
}
}
}
entities.truncate(5);
entities
}
fn extract_tags(text_lower: &str, memory_type: &str) -> Vec<String> {
let mut tags = Vec::new();
match memory_type {
"constraint" => tags.push("constraint".to_string()),
"decision" => tags.push("decision".to_string()),
"incident" => tags.push("incident".to_string()),
"workflow" => tags.push("workflow".to_string()),
"pattern" => tags.push("pattern".to_string()),
_ => {}
}
for (keywords, tag) in TAG_RULES {
if keywords.iter().any(|kw| text_lower.contains(kw)) && !tags.contains(&tag.to_string()) {
tags.push(tag.to_string());
}
}
tags.truncate(4);
tags
}
fn extract_triggers(title: &str) -> Vec<String> {
let words: Vec<&str> = title
.split_whitespace()
.map(|w| w.trim_matches(|c: char| !c.is_alphanumeric() && c != '-' && c != '_'))
.filter(|w| w.len() >= 2 && !STOP_WORDS.contains(&w.to_lowercase().as_str()))
.take(3)
.collect();
words.iter().map(|w| w.to_lowercase()).collect()
}
#[cfg(test)]
mod tests {
use super::*;
use crate::domain::{
MemoryLifecycleState, MemoryOrigin, MemoryRecord, MemoryScope, MemorySourceKind,
};
fn make_record(title: &str, summary: &str, memory_type: &str) -> MemoryRecord {
MemoryRecord {
title: title.to_string(),
summary: summary.to_string(),
memory_type: memory_type.to_string(),
scope: MemoryScope::User,
state: MemoryLifecycleState::Accepted,
origin: MemoryOrigin {
source_kind: MemorySourceKind::Manual,
source_ref: "test".to_string(),
},
project_id: None,
user_id: None,
sensitivity: None,
entities: Vec::new(),
tags: Vec::new(),
triggers: Vec::new(),
related_files: Vec::new(),
related_records: Vec::new(),
supersedes: None,
applies_to: Vec::new(),
valid_until: None,
}
}
#[test]
fn enrich_should_extract_known_tools_as_entities() {
let record = make_record(
"Use PostgreSQL for persistence",
"Store lifecycle data in PostgreSQL with Docker for local dev",
"decision",
);
let patch = enrich_record(&record);
assert!(patch.entities.iter().any(|e| e == "PostgreSQL"));
assert!(patch.entities.iter().any(|e| e == "Docker"));
}
#[test]
fn enrich_should_extract_tags_from_keywords() {
let record = make_record(
"Database migration strategy",
"Always use reversible SQL migrations",
"decision",
);
let patch = enrich_record(&record);
assert!(patch.tags.contains(&"decision".to_string()));
assert!(patch.tags.contains(&"database".to_string()));
}
#[test]
fn enrich_should_extract_triggers_from_title() {
let record = make_record(
"Prefer immutable data structures",
"Use immutable patterns to avoid side effects",
"constraint",
);
let patch = enrich_record(&record);
assert!(!patch.triggers.is_empty());
assert!(patch.triggers.contains(&"prefer".to_string()));
assert!(patch.triggers.contains(&"immutable".to_string()));
}
#[test]
fn enrich_should_skip_fields_that_already_have_values() {
let mut record = make_record(
"Use Rust for CLI",
"Rust provides good performance",
"decision",
);
record.entities = vec!["Rust".to_string()];
record.tags = vec!["language".to_string()];
let patch = enrich_record(&record);
assert!(
patch.entities.is_empty(),
"should skip entities since record already has them"
);
assert!(
patch.tags.is_empty(),
"should skip tags since record already has them"
);
assert!(!patch.triggers.is_empty(), "should still extract triggers");
}
#[test]
fn enrich_empty_patch_should_report_is_empty() {
let mut record = make_record("x", "y", "preference");
record.entities = vec!["a".to_string()];
record.tags = vec!["b".to_string()];
record.triggers = vec!["c".to_string()];
let patch = enrich_record(&record);
assert!(patch.is_empty());
}
}