1use crate::domain::MemoryRecord;
8
9#[derive(Debug, Clone, Default)]
11pub struct EnrichmentPatch {
12 pub entities: Vec<String>,
13 pub tags: Vec<String>,
14 pub triggers: Vec<String>,
15}
16
17impl EnrichmentPatch {
18 pub fn is_empty(&self) -> bool {
19 self.entities.is_empty() && self.tags.is_empty() && self.triggers.is_empty()
20 }
21}
22
23const KNOWN_TOOLS: &[&str] = &[
25 "Rust",
26 "TypeScript",
27 "JavaScript",
28 "Python",
29 "Go",
30 "Java",
31 "Swift",
32 "React",
33 "Vue",
34 "Svelte",
35 "Angular",
36 "Next.js",
37 "Nuxt",
38 "SQLite",
39 "PostgreSQL",
40 "MySQL",
41 "Redis",
42 "MongoDB",
43 "Docker",
44 "Kubernetes",
45 "Terraform",
46 "AWS",
47 "GCP",
48 "Azure",
49 "Git",
50 "GitHub",
51 "GitLab",
52 "Obsidian",
53 "Tauri",
54 "Electron",
55 "tantivy",
56 "tokio",
57 "serde",
58 "clap",
59 "anyhow",
60 "npm",
61 "cargo",
62 "pip",
63 "brew",
64 "MCP",
65 "JSON-RPC",
66 "REST",
67 "GraphQL",
68 "gRPC",
69 "OAuth",
70 "JWT",
71 "CSRF",
72 "XSS",
73 "CORS",
74 "CI",
75 "CD",
76 "TDD",
77 "BDD",
78];
79
80const STOP_WORDS: &[&str] = &[
82 "the", "a", "an", "is", "are", "was", "were", "be", "been", "to", "of", "in", "for", "on",
83 "with", "at", "by", "from", "and", "or", "not", "no", "but", "if", "then", "else", "this",
84 "that", "it", "its", "my", "your", "our", "do", "does", "did", "will", "would", "should",
85 "could", "have", "has", "had", "can", "may", "might", "use", "using", "used",
86];
87
88const TAG_RULES: &[(&[&str], &str)] = &[
90 (
91 &[
92 "database", "db", "sql", "postgres", "sqlite", "mysql", "redis", "mongo",
93 ],
94 "database",
95 ),
96 (
97 &["test", "spec", "assert", "mock", "tdd", "coverage"],
98 "testing",
99 ),
100 (
101 &["auth", "oauth", "jwt", "login", "session", "permission"],
102 "auth",
103 ),
104 (
105 &["api", "endpoint", "route", "rest", "graphql", "grpc"],
106 "api",
107 ),
108 (
109 &["deploy", "ci", "cd", "pipeline", "docker", "kubernetes"],
110 "devops",
111 ),
112 (
113 &["security", "csrf", "xss", "cors", "vulnerability", "secret"],
114 "security",
115 ),
116 (
117 &["performance", "cache", "optimize", "latency", "throughput"],
118 "performance",
119 ),
120 (
121 &["ui", "frontend", "component", "layout", "style", "css"],
122 "frontend",
123 ),
124 (
125 &["config", "setting", "environment", "env", "toml", "yaml"],
126 "config",
127 ),
128 (
129 &["error", "exception", "panic", "crash", "bug", "fix"],
130 "error-handling",
131 ),
132 (
133 &["refactor", "cleanup", "rename", "restructure", "simplify"],
134 "refactoring",
135 ),
136 (
137 &["document", "readme", "comment", "doc", "guide"],
138 "documentation",
139 ),
140];
141
142pub fn enrich_record(record: &MemoryRecord) -> EnrichmentPatch {
145 let text = format!("{} {}", record.title, record.summary);
146 let text_lower = text.to_lowercase();
147
148 let entities = if record.entities.is_empty() {
149 extract_entities(&text)
150 } else {
151 Vec::new()
152 };
153
154 let tags = if record.tags.is_empty() {
155 extract_tags(&text_lower, &record.memory_type)
156 } else {
157 Vec::new()
158 };
159
160 let triggers = if record.triggers.is_empty() {
161 extract_triggers(&record.title)
162 } else {
163 Vec::new()
164 };
165
166 EnrichmentPatch {
167 entities,
168 tags,
169 triggers,
170 }
171}
172
173fn extract_entities(text: &str) -> Vec<String> {
175 let mut entities = Vec::new();
176 let mut seen = std::collections::HashSet::new();
177
178 let text_lower = text.to_lowercase();
180 for tool in KNOWN_TOOLS {
181 let tool_lower = tool.to_lowercase();
182 if text_lower.contains(&tool_lower) && seen.insert(tool_lower) {
183 entities.push(tool.to_string());
184 }
185 }
186
187 for word in text.split_whitespace() {
189 let cleaned = word.trim_matches(|c: char| !c.is_alphanumeric());
190 if cleaned.len() >= 2
191 && cleaned
192 .chars()
193 .next()
194 .map(|c| c.is_uppercase())
195 .unwrap_or(false)
196 && !cleaned
197 .chars()
198 .all(|c| c.is_uppercase() || !c.is_alphabetic())
199 && !STOP_WORDS.contains(&cleaned.to_lowercase().as_str())
200 {
201 let lower = cleaned.to_lowercase();
202 if seen.insert(lower) {
204 entities.push(cleaned.to_string());
205 }
206 }
207 }
208
209 entities.truncate(5);
211 entities
212}
213
214fn extract_tags(text_lower: &str, memory_type: &str) -> Vec<String> {
216 let mut tags = Vec::new();
217
218 match memory_type {
220 "constraint" => tags.push("constraint".to_string()),
221 "decision" => tags.push("decision".to_string()),
222 "incident" => tags.push("incident".to_string()),
223 "workflow" => tags.push("workflow".to_string()),
224 "pattern" => tags.push("pattern".to_string()),
225 _ => {}
226 }
227
228 for (keywords, tag) in TAG_RULES {
230 if keywords.iter().any(|kw| text_lower.contains(kw)) && !tags.contains(&tag.to_string()) {
231 tags.push(tag.to_string());
232 }
233 }
234
235 tags.truncate(4);
237 tags
238}
239
240fn extract_triggers(title: &str) -> Vec<String> {
242 let words: Vec<&str> = title
243 .split_whitespace()
244 .map(|w| w.trim_matches(|c: char| !c.is_alphanumeric() && c != '-' && c != '_'))
245 .filter(|w| w.len() >= 2 && !STOP_WORDS.contains(&w.to_lowercase().as_str()))
246 .take(3)
247 .collect();
248
249 words.iter().map(|w| w.to_lowercase()).collect()
250}
251
252#[cfg(test)]
253mod tests {
254 use super::*;
255 use crate::domain::{
256 MemoryLifecycleState, MemoryOrigin, MemoryRecord, MemoryScope, MemorySourceKind,
257 };
258
259 fn make_record(title: &str, summary: &str, memory_type: &str) -> MemoryRecord {
260 MemoryRecord {
261 title: title.to_string(),
262 summary: summary.to_string(),
263 memory_type: memory_type.to_string(),
264 scope: MemoryScope::User,
265 state: MemoryLifecycleState::Accepted,
266 origin: MemoryOrigin {
267 source_kind: MemorySourceKind::Manual,
268 source_ref: "test".to_string(),
269 },
270 project_id: None,
271 user_id: None,
272 sensitivity: None,
273 entities: Vec::new(),
274 tags: Vec::new(),
275 triggers: Vec::new(),
276 related_files: Vec::new(),
277 related_records: Vec::new(),
278 supersedes: None,
279 applies_to: Vec::new(),
280 valid_until: None,
281 }
282 }
283
284 #[test]
285 fn enrich_should_extract_known_tools_as_entities() {
286 let record = make_record(
287 "Use PostgreSQL for persistence",
288 "Store lifecycle data in PostgreSQL with Docker for local dev",
289 "decision",
290 );
291 let patch = enrich_record(&record);
292 assert!(patch.entities.iter().any(|e| e == "PostgreSQL"));
293 assert!(patch.entities.iter().any(|e| e == "Docker"));
294 }
295
296 #[test]
297 fn enrich_should_extract_tags_from_keywords() {
298 let record = make_record(
299 "Database migration strategy",
300 "Always use reversible SQL migrations",
301 "decision",
302 );
303 let patch = enrich_record(&record);
304 assert!(patch.tags.contains(&"decision".to_string()));
305 assert!(patch.tags.contains(&"database".to_string()));
306 }
307
308 #[test]
309 fn enrich_should_extract_triggers_from_title() {
310 let record = make_record(
311 "Prefer immutable data structures",
312 "Use immutable patterns to avoid side effects",
313 "constraint",
314 );
315 let patch = enrich_record(&record);
316 assert!(!patch.triggers.is_empty());
317 assert!(patch.triggers.contains(&"prefer".to_string()));
318 assert!(patch.triggers.contains(&"immutable".to_string()));
319 }
320
321 #[test]
322 fn enrich_should_skip_fields_that_already_have_values() {
323 let mut record = make_record(
324 "Use Rust for CLI",
325 "Rust provides good performance",
326 "decision",
327 );
328 record.entities = vec!["Rust".to_string()];
329 record.tags = vec!["language".to_string()];
330 let patch = enrich_record(&record);
333 assert!(
334 patch.entities.is_empty(),
335 "should skip entities since record already has them"
336 );
337 assert!(
338 patch.tags.is_empty(),
339 "should skip tags since record already has them"
340 );
341 assert!(!patch.triggers.is_empty(), "should still extract triggers");
342 }
343
344 #[test]
345 fn enrich_empty_patch_should_report_is_empty() {
346 let mut record = make_record("x", "y", "preference");
347 record.entities = vec!["a".to_string()];
348 record.tags = vec!["b".to_string()];
349 record.triggers = vec!["c".to_string()];
350 let patch = enrich_record(&record);
351 assert!(patch.is_empty());
352 }
353}