Skip to main content

matrixcode_core/memory/
keywords_config.rs

1//! Keywords configuration for memory detection.
2//! Loads from ~/.matrix/keywords.json or uses embedded defaults.
3
4use std::collections::{HashMap, HashSet};
5use std::env::var_os;
6use std::path::PathBuf;
7
8use serde::{Deserialize, Serialize};
9
10use super::types::MemoryCategory;
11
12// ============================================================================
13// Helper Functions
14// ============================================================================
15
16/// Get the home directory.
17fn home_dir() -> Option<PathBuf> {
18    var_os("HOME")
19        .or_else(|| var_os("USERPROFILE"))
20        .map(PathBuf::from)
21}
22
23// ============================================================================
24// Embedded Default Keywords (fallback when no config file)
25// ============================================================================
26
27/// Get embedded default keywords configuration.
28pub fn get_default_keywords() -> KeywordsConfig {
29    KeywordsConfig {
30        version: 1,
31        patterns: default_patterns(),
32        stop_words: default_stop_words(),
33        semantic_aliases: default_aliases(),
34        contradiction_signals: default_contradiction_signals(),
35        tech_keywords: default_tech_keywords(),
36    }
37}
38
39fn default_patterns() -> HashMap<String, Vec<String>> {
40    HashMap::from([
41        (
42            "decision".to_string(),
43            vec![
44                "最终决定".to_string(),
45                "决定采用".to_string(),
46                "我们决定".to_string(),
47                "选择使用".to_string(),
48                "采用方案".to_string(),
49                "定下来".to_string(),
50                "就定这个".to_string(),
51                "敲定".to_string(),
52                "拍板".to_string(),
53                "we decided".to_string(),
54                "final decision".to_string(),
55                "decided to".to_string(),
56                "chose to".to_string(),
57            ],
58        ),
59        (
60            "preference".to_string(),
61            vec![
62                "我喜欢".to_string(),
63                "我偏好".to_string(),
64                "我习惯".to_string(),
65                "最常用".to_string(),
66                "一直用".to_string(),
67                "推荐".to_string(),
68                "建议使用".to_string(),
69                "首选".to_string(),
70                "prefer".to_string(),
71                "i like".to_string(),
72                "i prefer".to_string(),
73                "my favorite".to_string(),
74            ],
75        ),
76        (
77            "solution".to_string(),
78            vec![
79                "通过修改".to_string(),
80                "解决方案是".to_string(),
81                "搞定".to_string(),
82                "解决了".to_string(),
83                "修复成功".to_string(),
84                "改成".to_string(),
85                "优化了".to_string(),
86                "fixed by".to_string(),
87                "solved by".to_string(),
88                "resolved".to_string(),
89            ],
90        ),
91        (
92            "finding".to_string(),
93            vec![
94                "发现".to_string(),
95                "注意到".to_string(),
96                "原来".to_string(),
97                "找到问题".to_string(),
98                "定位到".to_string(),
99                "排查发现".to_string(),
100                "原因是".to_string(),
101                "found that".to_string(),
102                "discovered".to_string(),
103                "the reason is".to_string(),
104            ],
105        ),
106        (
107            "technical".to_string(),
108            vec![
109                "技术栈是".to_string(),
110                "框架使用".to_string(),
111                "用的是".to_string(),
112                "基于".to_string(),
113                "tech stack".to_string(),
114                "using framework".to_string(),
115                "built with".to_string(),
116                "powered by".to_string(),
117            ],
118        ),
119        (
120            "structure".to_string(),
121            vec![
122                "入口文件是".to_string(),
123                "主文件位于".to_string(),
124                "项目结构是".to_string(),
125                "入口是".to_string(),
126                "目录是".to_string(),
127                "entry point".to_string(),
128                "main file".to_string(),
129                "located at".to_string(),
130            ],
131        ),
132    ])
133}
134
135fn default_stop_words() -> StopWordsConfig {
136    StopWordsConfig {
137        chinese: vec![
138            "的", "了", "是", "在", "我", "有", "和", "就", "不", "人", "都", "一", "一个", "上",
139            "也", "很", "到", "说", "要", "去", "你", "会", "着", "没有", "看", "好", "自己", "这",
140            "他", "她", "它", "们", "那", "些", "什么", "怎么", "如何", "请", "能", "可以", "需要",
141            "应该", "可能", "因为", "所以", "但是", "然后", "还是", "已经", "正在", "将要", "曾经",
142            "一下", "一点", "一些", "所有", "每个", "任何",
143        ]
144        .into_iter()
145        .map(|s| s.to_string())
146        .collect(),
147        english: vec![
148            "the", "a", "an", "is", "are", "was", "were", "be", "been", "being", "have", "has",
149            "had", "do", "does", "did", "will", "would", "could", "should", "may", "might", "can",
150            "shall", "to", "of", "in", "for", "on", "with", "at", "by", "from", "as", "into",
151            "through", "during", "before", "after", "above", "below", "between", "and", "but",
152            "or", "not", "no", "so", "if", "then", "than", "too", "very", "just", "this", "that",
153            "these", "those", "it", "its", "i", "me", "my", "we", "our", "you", "your", "he",
154            "his", "she", "her", "they", "their", "please", "help", "need", "want", "make", "get",
155            "let", "use",
156        ]
157        .into_iter()
158        .map(|s| s.to_string())
159        .collect(),
160    }
161}
162
163fn default_aliases() -> Vec<[String; 2]> {
164    vec![
165        ["数据库".to_string(), "database".to_string()],
166        ["db".to_string(), "database".to_string()],
167        ["前端".to_string(), "frontend".to_string()],
168        ["ui".to_string(), "frontend".to_string()],
169        ["界面".to_string(), "frontend".to_string()],
170        ["后端".to_string(), "backend".to_string()],
171        ["api".to_string(), "api".to_string()],
172        ["接口".to_string(), "api".to_string()],
173        ["服务".to_string(), "service".to_string()],
174        ["服务器".to_string(), "server".to_string()],
175        ["配置".to_string(), "config".to_string()],
176        ["设置".to_string(), "setting".to_string()],
177        ["目录".to_string(), "directory".to_string()],
178        ["文件".to_string(), "file".to_string()],
179        ["路径".to_string(), "path".to_string()],
180        ["测试".to_string(), "test".to_string()],
181        ["缓存".to_string(), "cache".to_string()],
182        ["认证".to_string(), "auth".to_string()],
183        ["登录".to_string(), "login".to_string()],
184        ["性能".to_string(), "performance".to_string()],
185        ["优化".to_string(), "optimize".to_string()],
186        ["创建".to_string(), "create".to_string()],
187        ["删除".to_string(), "delete".to_string()],
188        ["修改".to_string(), "modify".to_string()],
189        ["添加".to_string(), "add".to_string()],
190        ["更新".to_string(), "update".to_string()],
191        ["查询".to_string(), "query".to_string()],
192    ]
193}
194
195fn default_contradiction_signals() -> Vec<String> {
196    vec![
197        "改用".to_string(),
198        "换成".to_string(),
199        "替换".to_string(),
200        "改为".to_string(),
201        "切换到".to_string(),
202        "迁移到".to_string(),
203        "不再使用".to_string(),
204        "弃用".to_string(),
205        "放弃".to_string(),
206        "取消".to_string(),
207        "switched to".to_string(),
208        "replaced".to_string(),
209        "migrated to".to_string(),
210        "changed to".to_string(),
211        "no longer".to_string(),
212        "deprecated".to_string(),
213        "abandoned".to_string(),
214    ]
215}
216
217fn default_tech_keywords() -> Vec<String> {
218    vec![
219        "api".to_string(),
220        "cli".to_string(),
221        "gui".to_string(),
222        "tui".to_string(),
223        "web".to_string(),
224        "http".to_string(),
225        "json".to_string(),
226        "xml".to_string(),
227        "sql".to_string(),
228        "db".to_string(),
229        "git".to_string(),
230        "npm".to_string(),
231        "cargo".to_string(),
232        "rust".to_string(),
233        "js".to_string(),
234        "ts".to_string(),
235        "py".to_string(),
236        "go".to_string(),
237        "java".to_string(),
238        "cpp".to_string(),
239        "cpu".to_string(),
240        "gpu".to_string(),
241        "io".to_string(),
242        "fs".to_string(),
243        "os".to_string(),
244        "ux".to_string(),
245        "ai".to_string(),
246        "ml".to_string(),
247        "dl".to_string(),
248        "yaml".to_string(),
249        "yml".to_string(),
250        "toml".to_string(),
251        "md".to_string(),
252        "txt".to_string(),
253        "html".to_string(),
254        "css".to_string(),
255        "scss".to_string(),
256        "bug".to_string(),
257        "fix".to_string(),
258        "code".to_string(),
259        "data".to_string(),
260    ]
261}
262
263// ============================================================================
264// Configuration Structures
265// ============================================================================
266
267/// Keywords configuration for memory detection.
268#[derive(Debug, Clone, Serialize, Deserialize)]
269pub struct KeywordsConfig {
270    /// Configuration version.
271    pub version: u32,
272    /// Category detection patterns.
273    pub patterns: HashMap<String, Vec<String>>,
274    /// Stop words for keyword filtering.
275    pub stop_words: StopWordsConfig,
276    /// Semantic alias mappings.
277    pub semantic_aliases: Vec<[String; 2]>,
278    /// Contradiction/change signal words.
279    pub contradiction_signals: Vec<String>,
280    /// Technical keywords for extraction.
281    pub tech_keywords: Vec<String>,
282}
283
284/// Stop words configuration.
285#[derive(Debug, Clone, Serialize, Deserialize)]
286pub struct StopWordsConfig {
287    /// Chinese stop words.
288    pub chinese: Vec<String>,
289    /// English stop words.
290    pub english: Vec<String>,
291}
292
293impl KeywordsConfig {
294    /// Load configuration from file or use defaults.
295    pub fn load() -> Self {
296        // Try user config directory first
297        if let Some(home) = home_dir() {
298            let config_path = home.join(".matrix").join("keywords.json");
299            if config_path.exists()
300                && let Ok(content) = std::fs::read_to_string(&config_path)
301            {
302                if let Ok(config) = serde_json::from_str::<Self>(&content) {
303                    log::info!("Loaded keywords config from {}", config_path.display());
304                    return config;
305                } else {
306                    log::warn!(
307                        "Failed to parse keywords config from {}, using defaults",
308                        config_path.display()
309                    );
310                }
311            }
312        }
313
314        // Try project config directory
315        if let Ok(cwd) = std::env::current_dir() {
316            let project_config = cwd.join(".matrix").join("keywords.json");
317            if project_config.exists()
318                && let Ok(content) = std::fs::read_to_string(&project_config)
319                && let Ok(config) = serde_json::from_str::<Self>(&content)
320            {
321                log::info!("Loaded keywords config from {}", project_config.display());
322                return config;
323            }
324        }
325
326        // Use embedded defaults
327        get_default_keywords()
328    }
329
330    /// Get patterns for a specific category.
331    pub fn get_patterns(&self, category: MemoryCategory) -> &[String] {
332        let key = match category {
333            MemoryCategory::Decision => "decision",
334            MemoryCategory::Preference => "preference",
335            MemoryCategory::Solution => "solution",
336            MemoryCategory::Finding => "finding",
337            MemoryCategory::Technical => "technical",
338            MemoryCategory::Structure => "structure",
339        };
340        self.patterns.get(key).map(|v| v.as_slice()).unwrap_or(&[])
341    }
342
343    /// Get all stop words as a set.
344    pub fn get_stop_words_set(&self) -> HashSet<&str> {
345        self.stop_words
346            .chinese
347            .iter()
348            .chain(self.stop_words.english.iter())
349            .map(|s| s.as_str())
350            .collect()
351    }
352
353    /// Get tech keywords as a set.
354    pub fn get_tech_keywords_set(&self) -> HashSet<&str> {
355        self.tech_keywords.iter().map(|s| s.as_str()).collect()
356    }
357
358    /// Get semantic aliases as tuples.
359    pub fn get_aliases(&self) -> Vec<(&str, &str)> {
360        self.semantic_aliases
361            .iter()
362            .map(|pair| (pair[0].as_str(), pair[1].as_str()))
363            .collect()
364    }
365
366    /// Save default config to user directory (for customization).
367    pub fn save_default_to_user_dir() -> anyhow::Result<PathBuf> {
368        if let Some(home) = home_dir() {
369            let config_dir = home.join(".matrix");
370            std::fs::create_dir_all(&config_dir)?;
371            let config_path = config_dir.join("keywords.json");
372            let default = get_default_keywords();
373            let content = serde_json::to_string_pretty(&default)?;
374            std::fs::write(&config_path, content)?;
375            log::info!("Saved default keywords config to {}", config_path.display());
376            return Ok(config_path);
377        }
378        anyhow::bail!("Could not determine home directory")
379    }
380
381    /// Add new tech keywords (from AI extraction).
382    /// Returns true if any new keywords were added.
383    pub fn add_keywords(&mut self, new_keywords: &[String]) -> bool {
384        let mut added = false;
385        for kw in new_keywords {
386            let kw_lower = kw.to_lowercase();
387            if !self
388                .tech_keywords
389                .iter()
390                .any(|t| t.to_lowercase() == kw_lower)
391            {
392                self.tech_keywords.push(kw.clone());
393                added = true;
394                log::debug!("Added new keyword: {}", kw);
395            }
396        }
397        added
398    }
399
400    /// Add new semantic alias (from AI extraction).
401    /// Returns true if the alias was added.
402    pub fn add_alias(&mut self, alias: &str, target: &str) -> bool {
403        // Check if alias already exists
404        for pair in &self.semantic_aliases {
405            if pair[0] == alias || pair[1] == target {
406                return false;
407            }
408        }
409        self.semantic_aliases
410            .push([alias.to_string(), target.to_string()]);
411        log::debug!("Added new alias: {} -> {}", alias, target);
412        true
413    }
414
415    /// Add new pattern for a category (from AI extraction).
416    /// Returns true if the pattern was added.
417    pub fn add_pattern(&mut self, category: &str, pattern: &str) -> bool {
418        let patterns = self.patterns.get_mut(category);
419        if let Some(list) = patterns {
420            let pattern_lower = pattern.to_lowercase();
421            if !list.iter().any(|p| p.to_lowercase() == pattern_lower) {
422                list.push(pattern.to_string());
423                log::debug!("Added new pattern for {}: {}", category, pattern);
424                return true;
425            }
426        }
427        false
428    }
429
430    /// Save config to user directory (~/.matrix/keywords.json).
431    pub fn save(&self) -> anyhow::Result<PathBuf> {
432        if let Some(home) = home_dir() {
433            let config_dir = home.join(".matrix");
434            std::fs::create_dir_all(&config_dir)?;
435            let config_path = config_dir.join("keywords.json");
436            let content = serde_json::to_string_pretty(self)?;
437            std::fs::write(&config_path, content)?;
438            log::info!("Saved keywords config to {}", config_path.display());
439            return Ok(config_path);
440        }
441        anyhow::bail!("Could not determine home directory")
442    }
443
444    /// Load, update with new keywords, and save.
445    /// Used by AI to automatically expand the keyword library.
446    pub fn update_and_save(
447        new_keywords: &[String],
448        new_aliases: Option<&[(String, String)]>,
449    ) -> anyhow::Result<PathBuf> {
450        let mut config = Self::load();
451        let mut changed = config.add_keywords(new_keywords);
452
453        if let Some(aliases) = new_aliases {
454            for (alias, target) in aliases {
455                changed |= config.add_alias(alias, target);
456            }
457        }
458
459        if changed {
460            config.save()
461        } else {
462            // Return path even if not saved
463            home_dir()
464                .map(|h| h.join(".matrix").join("keywords.json"))
465                .ok_or_else(|| anyhow::anyhow!("Could not determine home directory"))
466        }
467    }
468}
469
470impl Default for KeywordsConfig {
471    fn default() -> Self {
472        Self::load()
473    }
474}