lean_ctx/core/knowledge/
persist.rs1use chrono::Utc;
2use std::path::PathBuf;
3
4use super::ranking::hash_project_root;
5use super::types::{ConsolidatedInsight, KnowledgeFact, ProjectKnowledge, ProjectPattern};
6use crate::core::memory_policy::MemoryPolicy;
7
8fn knowledge_dir(project_hash: &str) -> Result<PathBuf, String> {
9 Ok(crate::core::data_dir::lean_ctx_data_dir()?
10 .join("knowledge")
11 .join(project_hash))
12}
13
14impl ProjectKnowledge {
15 pub fn save(&self) -> Result<(), String> {
16 let dir = knowledge_dir(&self.project_hash)?;
17 std::fs::create_dir_all(&dir).map_err(|e| e.to_string())?;
18 #[cfg(unix)]
19 {
20 use std::os::unix::fs::PermissionsExt;
21 let _ = std::fs::set_permissions(&dir, std::fs::Permissions::from_mode(0o700));
22 }
23
24 let path = dir.join("knowledge.json");
25 let json = serde_json::to_string_pretty(self).map_err(|e| e.to_string())?;
26 std::fs::write(&path, &json).map_err(|e| e.to_string())?;
27 #[cfg(unix)]
28 {
29 use std::os::unix::fs::PermissionsExt;
30 let _ = std::fs::set_permissions(&path, std::fs::Permissions::from_mode(0o600));
31 }
32 Ok(())
33 }
34
35 pub fn load(project_root: &str) -> Option<Self> {
36 let hash = hash_project_root(project_root);
37 let dir = knowledge_dir(&hash).ok()?;
38 let path = dir.join("knowledge.json");
39
40 if let Ok(content) = std::fs::read_to_string(&path) {
41 let size = content.len();
42 if size > 1_000_000 {
43 tracing::warn!(
44 "knowledge.json is large ({:.1} MB) — recall may be slow. \
45 Consider running ctx_knowledge(action=\"consolidate\") to compact it.",
46 size as f64 / 1_048_576.0,
47 );
48 }
49 if let Ok(k) = serde_json::from_str::<Self>(&content) {
50 return Some(k);
51 }
52 }
53
54 let old_hash = crate::core::project_hash::hash_path_only(project_root);
55 if old_hash != hash {
56 crate::core::project_hash::migrate_if_needed(&old_hash, &hash, project_root);
57 if let Ok(content) = std::fs::read_to_string(&path) {
58 if let Ok(mut k) = serde_json::from_str::<Self>(&content) {
59 k.project_hash = hash;
60 let _ = k.save();
61 return Some(k);
62 }
63 }
64 }
65
66 None
67 }
68
69 pub fn load_or_create(project_root: &str) -> Self {
70 Self::load(project_root).unwrap_or_else(|| Self::new(project_root))
71 }
72
73 pub fn migrate_legacy_empty_root(
76 target_root: &str,
77 policy: &MemoryPolicy,
78 ) -> Result<bool, String> {
79 if target_root.trim().is_empty() {
80 return Ok(false);
81 }
82
83 let Some(legacy) = Self::load("") else {
84 return Ok(false);
85 };
86
87 if !legacy.project_root.trim().is_empty() {
88 return Ok(false);
89 }
90 if legacy.facts.is_empty() && legacy.patterns.is_empty() && legacy.history.is_empty() {
91 return Ok(false);
92 }
93
94 let mut target = Self::load_or_create(target_root);
95
96 fn fact_key(f: &KnowledgeFact) -> String {
97 format!(
98 "{}|{}|{}|{}|{}",
99 f.category, f.key, f.value, f.source_session, f.created_at
100 )
101 }
102 fn pattern_key(p: &ProjectPattern) -> String {
103 format!(
104 "{}|{}|{}|{}",
105 p.pattern_type, p.description, p.source_session, p.created_at
106 )
107 }
108 fn history_key(h: &ConsolidatedInsight) -> String {
109 format!(
110 "{}|{}|{}",
111 h.summary,
112 h.from_sessions.join(","),
113 h.timestamp
114 )
115 }
116
117 let mut seen_facts: std::collections::HashSet<String> =
118 target.facts.iter().map(fact_key).collect();
119 for f in legacy.facts {
120 if seen_facts.insert(fact_key(&f)) {
121 target.facts.push(f);
122 }
123 }
124
125 let mut seen_patterns: std::collections::HashSet<String> =
126 target.patterns.iter().map(pattern_key).collect();
127 for p in legacy.patterns {
128 if seen_patterns.insert(pattern_key(&p)) {
129 target.patterns.push(p);
130 }
131 }
132
133 let mut seen_history: std::collections::HashSet<String> =
134 target.history.iter().map(history_key).collect();
135 for h in legacy.history {
136 if seen_history.insert(history_key(&h)) {
137 target.history.push(h);
138 }
139 }
140
141 target.facts.sort_by(|a, b| {
142 b.created_at
143 .cmp(&a.created_at)
144 .then_with(|| b.confidence.total_cmp(&a.confidence))
145 });
146 if target.facts.len() > policy.knowledge.max_facts {
147 target.facts.truncate(policy.knowledge.max_facts);
148 }
149 target
150 .patterns
151 .sort_by_key(|x| std::cmp::Reverse(x.created_at));
152 if target.patterns.len() > policy.knowledge.max_patterns {
153 target.patterns.truncate(policy.knowledge.max_patterns);
154 }
155 target
156 .history
157 .sort_by_key(|x| std::cmp::Reverse(x.timestamp));
158 if target.history.len() > policy.knowledge.max_history {
159 target.history.truncate(policy.knowledge.max_history);
160 }
161
162 target.updated_at = Utc::now();
163 target.save()?;
164
165 let legacy_hash = crate::core::project_hash::hash_path_only("");
166 let legacy_dir = knowledge_dir(&legacy_hash)?;
167 let legacy_path = legacy_dir.join("knowledge.json");
168 if legacy_path.exists() {
169 let ts = Utc::now().format("%Y%m%d-%H%M%S");
170 let backup = legacy_dir.join(format!("knowledge.legacy-empty-root.{ts}.json"));
171 std::fs::rename(&legacy_path, &backup).map_err(|e| e.to_string())?;
172 }
173
174 Ok(true)
175 }
176}