Skip to main content

matrixcode_core/memory/
keywords_config.rs

1//! Keywords configuration for memory detection.
2//! Loads from ~/.matrix/keywords.json or uses embedded defaults.
3
4use std::collections::{HashMap, HashSet};
5use std::env::var_os;
6use std::path::PathBuf;
7
8use serde::{Deserialize, Serialize};
9
10use super::types::MemoryCategory;
11
12// ============================================================================
13// Helper Functions
14// ============================================================================
15
16/// Get the home directory.
17fn home_dir() -> Option<PathBuf> {
18    var_os("HOME")
19        .or_else(|| var_os("USERPROFILE"))
20        .map(PathBuf::from)
21}
22
23// ============================================================================
24// Embedded Default Keywords (fallback when no config file)
25// ============================================================================
26
27/// Get embedded default keywords configuration.
28pub fn get_default_keywords() -> KeywordsConfig {
29    KeywordsConfig {
30        version: 1,
31        patterns: default_patterns(),
32        stop_words: default_stop_words(),
33        semantic_aliases: default_aliases(),
34        contradiction_signals: default_contradiction_signals(),
35        tech_keywords: default_tech_keywords(),
36    }
37}
38
39fn default_patterns() -> HashMap<String, Vec<String>> {
40    HashMap::from([
41        (
42            "decision".to_string(),
43            vec![
44                "最终决定".to_string(),
45                "决定采用".to_string(),
46                "我们决定".to_string(),
47                "选择使用".to_string(),
48                "采用方案".to_string(),
49                "定下来".to_string(),
50                "就定这个".to_string(),
51                "敲定".to_string(),
52                "拍板".to_string(),
53                "we decided".to_string(),
54                "final decision".to_string(),
55                "decided to".to_string(),
56                "chose to".to_string(),
57            ],
58        ),
59        (
60            "preference".to_string(),
61            vec![
62                "我喜欢".to_string(),
63                "我偏好".to_string(),
64                "我习惯".to_string(),
65                "最常用".to_string(),
66                "一直用".to_string(),
67                "推荐".to_string(),
68                "建议使用".to_string(),
69                "首选".to_string(),
70                "prefer".to_string(),
71                "i like".to_string(),
72                "i prefer".to_string(),
73                "my favorite".to_string(),
74            ],
75        ),
76        (
77            "solution".to_string(),
78            vec![
79                "通过修改".to_string(),
80                "解决方案是".to_string(),
81                "搞定".to_string(),
82                "解决了".to_string(),
83                "修复成功".to_string(),
84                "改成".to_string(),
85                "优化了".to_string(),
86                "fixed by".to_string(),
87                "solved by".to_string(),
88                "resolved".to_string(),
89            ],
90        ),
91        (
92            "finding".to_string(),
93            vec![
94                "发现".to_string(),
95                "注意到".to_string(),
96                "原来".to_string(),
97                "找到问题".to_string(),
98                "定位到".to_string(),
99                "排查发现".to_string(),
100                "原因是".to_string(),
101                "found that".to_string(),
102                "discovered".to_string(),
103                "the reason is".to_string(),
104            ],
105        ),
106        (
107            "technical".to_string(),
108            vec![
109                "技术栈是".to_string(),
110                "框架使用".to_string(),
111                "用的是".to_string(),
112                "基于".to_string(),
113                "tech stack".to_string(),
114                "using framework".to_string(),
115                "built with".to_string(),
116                "powered by".to_string(),
117            ],
118        ),
119        (
120            "structure".to_string(),
121            vec![
122                "入口文件是".to_string(),
123                "主文件位于".to_string(),
124                "项目结构是".to_string(),
125                "入口是".to_string(),
126                "目录是".to_string(),
127                "entry point".to_string(),
128                "main file".to_string(),
129                "located at".to_string(),
130            ],
131        ),
132    ])
133}
134
135fn default_stop_words() -> StopWordsConfig {
136    StopWordsConfig {
137        chinese: vec![
138            "的", "了", "是", "在", "我", "有", "和", "就", "不", "人", "都", "一", "一个", "上",
139            "也", "很", "到", "说", "要", "去", "你", "会", "着", "没有", "看", "好", "自己", "这",
140            "他", "她", "它", "们", "那", "些", "什么", "怎么", "如何", "请", "能", "可以", "需要",
141            "应该", "可能", "因为", "所以", "但是", "然后", "还是", "已经", "正在", "将要", "曾经",
142            "一下", "一点", "一些", "所有", "每个", "任何",
143        ]
144        .into_iter()
145        .map(|s| s.to_string())
146        .collect(),
147        english: vec![
148            "the", "a", "an", "is", "are", "was", "were", "be", "been", "being", "have", "has",
149            "had", "do", "does", "did", "will", "would", "could", "should", "may", "might", "can",
150            "shall", "to", "of", "in", "for", "on", "with", "at", "by", "from", "as", "into",
151            "through", "during", "before", "after", "above", "below", "between", "and", "but",
152            "or", "not", "no", "so", "if", "then", "than", "too", "very", "just", "this", "that",
153            "these", "those", "it", "its", "i", "me", "my", "we", "our", "you", "your", "he",
154            "his", "she", "her", "they", "their", "please", "help", "need", "want", "make", "get",
155            "let", "use",
156        ]
157        .into_iter()
158        .map(|s| s.to_string())
159        .collect(),
160    }
161}
162
163fn default_aliases() -> Vec<[String; 2]> {
164    vec![
165        ["数据库".to_string(), "database".to_string()],
166        ["db".to_string(), "database".to_string()],
167        ["前端".to_string(), "frontend".to_string()],
168        ["ui".to_string(), "frontend".to_string()],
169        ["界面".to_string(), "frontend".to_string()],
170        ["后端".to_string(), "backend".to_string()],
171        ["api".to_string(), "api".to_string()],
172        ["接口".to_string(), "api".to_string()],
173        ["服务".to_string(), "service".to_string()],
174        ["服务器".to_string(), "server".to_string()],
175        ["配置".to_string(), "config".to_string()],
176        ["设置".to_string(), "setting".to_string()],
177        ["目录".to_string(), "directory".to_string()],
178        ["文件".to_string(), "file".to_string()],
179        ["路径".to_string(), "path".to_string()],
180        ["测试".to_string(), "test".to_string()],
181        ["缓存".to_string(), "cache".to_string()],
182        ["认证".to_string(), "auth".to_string()],
183        ["登录".to_string(), "login".to_string()],
184        ["性能".to_string(), "performance".to_string()],
185        ["优化".to_string(), "optimize".to_string()],
186        ["创建".to_string(), "create".to_string()],
187        ["删除".to_string(), "delete".to_string()],
188        ["修改".to_string(), "modify".to_string()],
189        ["添加".to_string(), "add".to_string()],
190        ["更新".to_string(), "update".to_string()],
191        ["查询".to_string(), "query".to_string()],
192    ]
193}
194
195fn default_contradiction_signals() -> Vec<String> {
196    vec![
197        "改用".to_string(),
198        "换成".to_string(),
199        "替换".to_string(),
200        "改为".to_string(),
201        "切换到".to_string(),
202        "迁移到".to_string(),
203        "不再使用".to_string(),
204        "弃用".to_string(),
205        "放弃".to_string(),
206        "取消".to_string(),
207        "switched to".to_string(),
208        "replaced".to_string(),
209        "migrated to".to_string(),
210        "changed to".to_string(),
211        "no longer".to_string(),
212        "deprecated".to_string(),
213        "abandoned".to_string(),
214    ]
215}
216
217fn default_tech_keywords() -> Vec<String> {
218    vec![
219        "api".to_string(),
220        "cli".to_string(),
221        "gui".to_string(),
222        "tui".to_string(),
223        "web".to_string(),
224        "http".to_string(),
225        "json".to_string(),
226        "xml".to_string(),
227        "sql".to_string(),
228        "db".to_string(),
229        "git".to_string(),
230        "npm".to_string(),
231        "cargo".to_string(),
232        "rust".to_string(),
233        "js".to_string(),
234        "ts".to_string(),
235        "py".to_string(),
236        "go".to_string(),
237        "java".to_string(),
238        "cpp".to_string(),
239        "cpu".to_string(),
240        "gpu".to_string(),
241        "io".to_string(),
242        "fs".to_string(),
243        "os".to_string(),
244        "ux".to_string(),
245        "ai".to_string(),
246        "ml".to_string(),
247        "dl".to_string(),
248        "yaml".to_string(),
249        "yml".to_string(),
250        "toml".to_string(),
251        "md".to_string(),
252        "txt".to_string(),
253        "html".to_string(),
254        "css".to_string(),
255        "scss".to_string(),
256        "bug".to_string(),
257        "fix".to_string(),
258        "code".to_string(),
259        "data".to_string(),
260    ]
261}
262
263// ============================================================================
264// Configuration Structures
265// ============================================================================
266
267/// Keywords configuration for memory detection.
268#[derive(Debug, Clone, Serialize, Deserialize)]
269pub struct KeywordsConfig {
270    /// Configuration version.
271    pub version: u32,
272    /// Category detection patterns.
273    pub patterns: HashMap<String, Vec<String>>,
274    /// Stop words for keyword filtering.
275    pub stop_words: StopWordsConfig,
276    /// Semantic alias mappings.
277    pub semantic_aliases: Vec<[String; 2]>,
278    /// Contradiction/change signal words.
279    pub contradiction_signals: Vec<String>,
280    /// Technical keywords for extraction.
281    pub tech_keywords: Vec<String>,
282}
283
284/// Stop words configuration.
285#[derive(Debug, Clone, Serialize, Deserialize)]
286pub struct StopWordsConfig {
287    /// Chinese stop words.
288    pub chinese: Vec<String>,
289    /// English stop words.
290    pub english: Vec<String>,
291}
292
293impl KeywordsConfig {
294    /// Load configuration from file or use defaults.
295    pub fn load() -> Self {
296        // Try user config directory first
297        if let Some(home) = home_dir() {
298            let config_path = home.join(".matrix").join("keywords.json");
299            if config_path.exists()
300                && let Ok(content) = std::fs::read_to_string(&config_path)
301            {
302                if let Ok(config) = serde_json::from_str::<Self>(&content) {
303                    log::info!("Loaded keywords config from {}", config_path.display());
304                    return config;
305                } else {
306                    log::warn!(
307                        "Failed to parse keywords config from {}, using defaults",
308                        config_path.display()
309                    );
310                }
311            }
312        }
313
314        // Try project config directory
315        if let Ok(cwd) = std::env::current_dir() {
316            let project_config = cwd.join(".matrix").join("keywords.json");
317            if project_config.exists()
318                && let Ok(content) = std::fs::read_to_string(&project_config)
319                && let Ok(config) = serde_json::from_str::<Self>(&content)
320            {
321                log::info!("Loaded keywords config from {}", project_config.display());
322                return config;
323            }
324        }
325
326        // Use embedded defaults
327        get_default_keywords()
328    }
329
330    /// Get patterns for a specific category.
331    pub fn get_patterns(&self, category: MemoryCategory) -> &[String] {
332        let key = match category {
333            MemoryCategory::Decision => "decision",
334            MemoryCategory::Preference => "preference",
335            MemoryCategory::Solution => "solution",
336            MemoryCategory::Finding => "finding",
337            MemoryCategory::Technical => "technical",
338            MemoryCategory::Structure => "structure",
339            // New categories - use same patterns as parent categories
340            MemoryCategory::KeyDecision => "decision",
341            MemoryCategory::FailedApproach => "solution",
342            MemoryCategory::UserIntentPattern => "preference",
343            MemoryCategory::TaskPattern => "solution",
344        };
345        self.patterns.get(key).map(|v| v.as_slice()).unwrap_or(&[])
346    }
347
348    /// Get all stop words as a set.
349    pub fn get_stop_words_set(&self) -> HashSet<&str> {
350        self.stop_words
351            .chinese
352            .iter()
353            .chain(self.stop_words.english.iter())
354            .map(|s| s.as_str())
355            .collect()
356    }
357
358    /// Get tech keywords as a set.
359    pub fn get_tech_keywords_set(&self) -> HashSet<&str> {
360        self.tech_keywords.iter().map(|s| s.as_str()).collect()
361    }
362
363    /// Get semantic aliases as tuples.
364    pub fn get_aliases(&self) -> Vec<(&str, &str)> {
365        self.semantic_aliases
366            .iter()
367            .map(|pair| (pair[0].as_str(), pair[1].as_str()))
368            .collect()
369    }
370
371    /// Save default config to user directory (for customization).
372    pub fn save_default_to_user_dir() -> anyhow::Result<PathBuf> {
373        if let Some(home) = home_dir() {
374            let config_dir = home.join(".matrix");
375            std::fs::create_dir_all(&config_dir)?;
376            let config_path = config_dir.join("keywords.json");
377            let default = get_default_keywords();
378            let content = serde_json::to_string_pretty(&default)?;
379            std::fs::write(&config_path, content)?;
380            log::info!("Saved default keywords config to {}", config_path.display());
381            return Ok(config_path);
382        }
383        anyhow::bail!("Could not determine home directory")
384    }
385
386    /// Add new tech keywords (from AI extraction).
387    /// Returns true if any new keywords were added.
388    pub fn add_keywords(&mut self, new_keywords: &[String]) -> bool {
389        let mut added = false;
390        for kw in new_keywords {
391            let kw_lower = kw.to_lowercase();
392            if !self
393                .tech_keywords
394                .iter()
395                .any(|t| t.to_lowercase() == kw_lower)
396            {
397                self.tech_keywords.push(kw.clone());
398                added = true;
399                log::debug!("Added new keyword: {}", kw);
400            }
401        }
402        added
403    }
404
405    /// Add new semantic alias (from AI extraction).
406    /// Returns true if the alias was added.
407    pub fn add_alias(&mut self, alias: &str, target: &str) -> bool {
408        // Check if alias already exists
409        for pair in &self.semantic_aliases {
410            if pair[0] == alias || pair[1] == target {
411                return false;
412            }
413        }
414        self.semantic_aliases
415            .push([alias.to_string(), target.to_string()]);
416        log::debug!("Added new alias: {} -> {}", alias, target);
417        true
418    }
419
420    /// Add new pattern for a category (from AI extraction).
421    /// Returns true if the pattern was added.
422    pub fn add_pattern(&mut self, category: &str, pattern: &str) -> bool {
423        let patterns = self.patterns.get_mut(category);
424        if let Some(list) = patterns {
425            let pattern_lower = pattern.to_lowercase();
426            if !list.iter().any(|p| p.to_lowercase() == pattern_lower) {
427                list.push(pattern.to_string());
428                log::debug!("Added new pattern for {}: {}", category, pattern);
429                return true;
430            }
431        }
432        false
433    }
434
435    /// Save config to user directory (~/.matrix/keywords.json).
436    pub fn save(&self) -> anyhow::Result<PathBuf> {
437        if let Some(home) = home_dir() {
438            let config_dir = home.join(".matrix");
439            std::fs::create_dir_all(&config_dir)?;
440            let config_path = config_dir.join("keywords.json");
441            let content = serde_json::to_string_pretty(self)?;
442            std::fs::write(&config_path, content)?;
443            log::info!("Saved keywords config to {}", config_path.display());
444            return Ok(config_path);
445        }
446        anyhow::bail!("Could not determine home directory")
447    }
448
449    /// Load, update with new keywords, and save.
450    /// Used by AI to automatically expand the keyword library.
451    pub fn update_and_save(
452        new_keywords: &[String],
453        new_aliases: Option<&[(String, String)]>,
454    ) -> anyhow::Result<PathBuf> {
455        let mut config = Self::load();
456        let mut changed = config.add_keywords(new_keywords);
457
458        if let Some(aliases) = new_aliases {
459            for (alias, target) in aliases {
460                changed |= config.add_alias(alias, target);
461            }
462        }
463
464        if changed {
465            config.save()
466        } else {
467            // Return path even if not saved
468            home_dir()
469                .map(|h| h.join(".matrix").join("keywords.json"))
470                .ok_or_else(|| anyhow::anyhow!("Could not determine home directory"))
471        }
472    }
473}
474
475impl Default for KeywordsConfig {
476    fn default() -> Self {
477        Self::load()
478    }
479}