1use chrono::{DateTime, Utc};
2use serde::{Deserialize, Serialize};
3use std::path::PathBuf;
4
5use super::data_dir::lean_ctx_data_dir;
6
7#[derive(Debug, Clone, Serialize, Deserialize)]
8pub struct ArchiveEntry {
9 pub id: String,
10 pub tool: String,
11 pub command: String,
12 pub size_chars: usize,
13 pub size_tokens: usize,
14 pub created_at: DateTime<Utc>,
15 pub session_id: Option<String>,
16}
17
18fn archive_base_dir() -> PathBuf {
19 lean_ctx_data_dir()
20 .unwrap_or_else(|_| PathBuf::from(".lean-ctx"))
21 .join("archives")
22}
23
24fn entry_dir(id: &str) -> PathBuf {
25 let prefix = if id.len() >= 2 { &id[..2] } else { id };
26 archive_base_dir().join(prefix)
27}
28
29fn content_path(id: &str) -> PathBuf {
30 entry_dir(id).join(format!("{id}.txt"))
31}
32
33fn meta_path(id: &str) -> PathBuf {
34 entry_dir(id).join(format!("{id}.meta.json"))
35}
36
37fn compute_id(content: &str) -> String {
38 use std::collections::hash_map::DefaultHasher;
39 use std::hash::{Hash, Hasher};
40 let mut hasher = DefaultHasher::new();
41 content.hash(&mut hasher);
42 let hash = hasher.finish();
43 format!("{hash:016x}")
44}
45
46pub fn is_enabled() -> bool {
47 if let Ok(v) = std::env::var("LEAN_CTX_ARCHIVE") {
48 return !matches!(v.as_str(), "0" | "false" | "off");
49 }
50 super::config::Config::load().archive.enabled
51}
52
53fn threshold_chars() -> usize {
54 if let Ok(v) = std::env::var("LEAN_CTX_ARCHIVE_THRESHOLD") {
55 if let Ok(n) = v.parse::<usize>() {
56 return n;
57 }
58 }
59 super::config::Config::load().archive.threshold_chars
60}
61
62fn max_age_hours() -> u64 {
63 if let Ok(v) = std::env::var("LEAN_CTX_ARCHIVE_TTL") {
64 if let Ok(n) = v.parse::<u64>() {
65 return n;
66 }
67 }
68 super::config::Config::load().archive.max_age_hours
69}
70
71pub fn should_archive(content: &str) -> bool {
72 is_enabled() && content.len() >= threshold_chars()
73}
74
75const MAX_ARCHIVE_SIZE: usize = 10 * 1024 * 1024; pub fn store(tool: &str, command: &str, content: &str, session_id: Option<&str>) -> Option<String> {
78 if !is_enabled() || content.is_empty() {
79 return None;
80 }
81
82 let content = if content.len() > MAX_ARCHIVE_SIZE {
83 &content[..MAX_ARCHIVE_SIZE]
84 } else {
85 content
86 };
87
88 let id = compute_id(content);
89 let c_path = content_path(&id);
90
91 if c_path.exists() {
93 return Some(id);
94 }
95
96 let dir = entry_dir(&id);
97 if std::fs::create_dir_all(&dir).is_err() {
98 return None;
99 }
100
101 let pid = std::process::id();
107 let tmp_path = c_path.with_extension(format!("tmp.{pid}"));
108 if std::fs::write(&tmp_path, content).is_err() {
109 return None;
110 }
111 if std::fs::rename(&tmp_path, &c_path).is_err() {
112 let _ = std::fs::remove_file(&tmp_path);
113 if c_path.exists() {
115 return Some(id);
116 }
117 return None;
118 }
119
120 let tokens = super::tokens::count_tokens(content);
121 let entry = ArchiveEntry {
122 id: id.clone(),
123 tool: tool.to_string(),
124 command: command.to_string(),
125 size_chars: content.len(),
126 size_tokens: tokens,
127 created_at: Utc::now(),
128 session_id: session_id.map(|s| s.to_string()),
129 };
130
131 if let Ok(json) = serde_json::to_string_pretty(&entry) {
132 let meta_tmp = meta_path(&id).with_extension(format!("tmp.{pid}"));
133 if std::fs::write(&meta_tmp, &json).is_ok() {
134 let _ = std::fs::rename(&meta_tmp, meta_path(&id));
135 }
136 }
137
138 Some(id)
139}
140
141pub fn retrieve(id: &str) -> Option<String> {
142 let path = content_path(id);
143 std::fs::read_to_string(path).ok()
144}
145
146pub fn retrieve_with_range(id: &str, start: usize, end: usize) -> Option<String> {
147 let content = retrieve(id)?;
148 let lines: Vec<&str> = content.lines().collect();
149 let start = start.saturating_sub(1).min(lines.len());
150 let end = end.min(lines.len());
151 if start >= end {
152 return Some(String::new());
153 }
154 Some(
155 lines[start..end]
156 .iter()
157 .enumerate()
158 .map(|(i, line)| format!("{:>6}|{line}", start + i + 1))
159 .collect::<Vec<_>>()
160 .join("\n"),
161 )
162}
163
164pub fn retrieve_with_search(id: &str, pattern: &str) -> Option<String> {
165 let content = retrieve(id)?;
166 let pattern_lower = pattern.to_lowercase();
167 let matches: Vec<String> = content
168 .lines()
169 .enumerate()
170 .filter(|(_, line)| line.to_lowercase().contains(&pattern_lower))
171 .map(|(i, line)| format!("{:>6}|{line}", i + 1))
172 .collect();
173
174 if matches.is_empty() {
175 Some(format!("No matches for \"{pattern}\" in archive {id}"))
176 } else {
177 Some(format!(
178 "{} match(es) for \"{}\":\n{}",
179 matches.len(),
180 pattern,
181 matches.join("\n")
182 ))
183 }
184}
185
186pub fn list_entries(session_id: Option<&str>) -> Vec<ArchiveEntry> {
187 let base = archive_base_dir();
188 if !base.exists() {
189 return Vec::new();
190 }
191 let mut entries = Vec::new();
192 if let Ok(dirs) = std::fs::read_dir(&base) {
193 for dir_entry in dirs.flatten() {
194 if !dir_entry.path().is_dir() {
195 continue;
196 }
197 if let Ok(files) = std::fs::read_dir(dir_entry.path()) {
198 for file in files.flatten() {
199 let path = file.path();
200 if path.extension().and_then(|e| e.to_str()) != Some("json") {
201 continue;
202 }
203 if let Ok(data) = std::fs::read_to_string(&path) {
204 if let Ok(entry) = serde_json::from_str::<ArchiveEntry>(&data) {
205 if let Some(sid) = session_id {
206 if entry.session_id.as_deref() != Some(sid) {
207 continue;
208 }
209 }
210 entries.push(entry);
211 }
212 }
213 }
214 }
215 }
216 }
217 entries.sort_by(|a, b| b.created_at.cmp(&a.created_at));
218 entries
219}
220
221pub fn cleanup() -> u32 {
222 let max_hours = max_age_hours();
223 let cutoff = Utc::now() - chrono::Duration::hours(max_hours as i64);
224 let base = archive_base_dir();
225 if !base.exists() {
226 return 0;
227 }
228 let mut removed = 0u32;
229 if let Ok(dirs) = std::fs::read_dir(&base) {
230 for dir_entry in dirs.flatten() {
231 if !dir_entry.path().is_dir() {
232 continue;
233 }
234 if let Ok(files) = std::fs::read_dir(dir_entry.path()) {
235 for file in files.flatten() {
236 let path = file.path();
237 if path.extension().and_then(|e| e.to_str()) != Some("json") {
238 continue;
239 }
240 if let Ok(data) = std::fs::read_to_string(&path) {
241 if let Ok(entry) = serde_json::from_str::<ArchiveEntry>(&data) {
242 if entry.created_at < cutoff {
243 let c = content_path(&entry.id);
244 let _ = std::fs::remove_file(&c);
245 let _ = std::fs::remove_file(&path);
246 removed += 1;
247 }
248 }
249 }
250 }
251 }
252 }
253 }
254 removed
255}
256
257pub fn disk_usage_bytes() -> u64 {
258 let base = archive_base_dir();
259 if !base.exists() {
260 return 0;
261 }
262 let mut total = 0u64;
263 if let Ok(dirs) = std::fs::read_dir(&base) {
264 for dir_entry in dirs.flatten() {
265 if let Ok(files) = std::fs::read_dir(dir_entry.path()) {
266 for file in files.flatten() {
267 total += file.metadata().map(|m| m.len()).unwrap_or(0);
268 }
269 }
270 }
271 }
272 total
273}
274
275pub fn format_hint(id: &str, size_chars: usize, size_tokens: usize) -> String {
276 format!("[Archived: {size_chars} chars ({size_tokens} tok). Retrieve: ctx_expand(id=\"{id}\")]")
277}
278
279#[cfg(test)]
280mod tests {
281 use super::*;
282
283 #[test]
284 fn compute_id_deterministic() {
285 let id1 = compute_id("test content");
286 let id2 = compute_id("test content");
287 assert_eq!(id1, id2);
288 let id3 = compute_id("different content");
289 assert_ne!(id1, id3);
290 }
291
292 #[test]
293 fn nonexistent_id_returns_none() {
294 assert!(retrieve("nonexistent_archive_id_xyz").is_none());
295 }
296
297 #[test]
298 fn format_hint_readable() {
299 let hint = format_hint("abc123", 5000, 1200);
300 assert!(hint.contains("5000 chars"));
301 assert!(hint.contains("1200 tok"));
302 assert!(hint.contains("ctx_expand"));
303 assert!(hint.contains("abc123"));
304 }
305}