Skip to main content

lean_ctx/core/
archive.rs

1use chrono::{DateTime, Utc};
2use serde::{Deserialize, Serialize};
3use std::path::PathBuf;
4
5use super::data_dir::lean_ctx_data_dir;
6
7#[derive(Debug, Clone, Serialize, Deserialize)]
8pub struct ArchiveEntry {
9    pub id: String,
10    pub tool: String,
11    pub command: String,
12    pub size_chars: usize,
13    pub size_tokens: usize,
14    pub created_at: DateTime<Utc>,
15    pub session_id: Option<String>,
16}
17
18fn archive_base_dir() -> PathBuf {
19    lean_ctx_data_dir()
20        .unwrap_or_else(|_| PathBuf::from(".lean-ctx"))
21        .join("archives")
22}
23
24fn entry_dir(id: &str) -> PathBuf {
25    let prefix = if id.len() >= 2 { &id[..2] } else { id };
26    archive_base_dir().join(prefix)
27}
28
29fn content_path(id: &str) -> PathBuf {
30    entry_dir(id).join(format!("{id}.txt"))
31}
32
33fn meta_path(id: &str) -> PathBuf {
34    entry_dir(id).join(format!("{id}.meta.json"))
35}
36
37fn compute_id(content: &str) -> String {
38    use std::collections::hash_map::DefaultHasher;
39    use std::hash::{Hash, Hasher};
40    let mut hasher = DefaultHasher::new();
41    content.hash(&mut hasher);
42    let hash = hasher.finish();
43    format!("{hash:016x}")
44}
45
46pub fn is_enabled() -> bool {
47    if let Ok(v) = std::env::var("LEAN_CTX_ARCHIVE") {
48        return !matches!(v.as_str(), "0" | "false" | "off");
49    }
50    super::config::Config::load().archive.enabled
51}
52
53fn threshold_chars() -> usize {
54    if let Ok(v) = std::env::var("LEAN_CTX_ARCHIVE_THRESHOLD") {
55        if let Ok(n) = v.parse::<usize>() {
56            return n;
57        }
58    }
59    super::config::Config::load().archive.threshold_chars
60}
61
62fn max_age_hours() -> u64 {
63    if let Ok(v) = std::env::var("LEAN_CTX_ARCHIVE_TTL") {
64        if let Ok(n) = v.parse::<u64>() {
65            return n;
66        }
67    }
68    super::config::Config::load().archive.max_age_hours
69}
70
71pub fn should_archive(content: &str) -> bool {
72    is_enabled() && content.len() >= threshold_chars()
73}
74
75const MAX_ARCHIVE_SIZE: usize = 10 * 1024 * 1024; // 10 MB
76
77pub fn store(tool: &str, command: &str, content: &str, session_id: Option<&str>) -> Option<String> {
78    if !is_enabled() || content.is_empty() {
79        return None;
80    }
81
82    let content = if content.len() > MAX_ARCHIVE_SIZE {
83        &content[..MAX_ARCHIVE_SIZE]
84    } else {
85        content
86    };
87
88    let id = compute_id(content);
89    let c_path = content_path(&id);
90
91    // Fast path: content already archived (idempotent, no race)
92    if c_path.exists() {
93        return Some(id);
94    }
95
96    let dir = entry_dir(&id);
97    if std::fs::create_dir_all(&dir).is_err() {
98        return None;
99    }
100
101    // Atomic write: PID-unique tmp file prevents race between parallel writers.
102    // rename() is atomic on POSIX; on Windows it replaces atomically too.
103    // If two processes race past the exists() check, both write their own tmp
104    // file and both rename to the same target — last writer wins, content is
105    // identical (same hash), so the result is correct either way.
106    let pid = std::process::id();
107    let tmp_path = c_path.with_extension(format!("tmp.{pid}"));
108    if std::fs::write(&tmp_path, content).is_err() {
109        return None;
110    }
111    if std::fs::rename(&tmp_path, &c_path).is_err() {
112        let _ = std::fs::remove_file(&tmp_path);
113        // Another process may have won the race — check if content is there now
114        if c_path.exists() {
115            return Some(id);
116        }
117        return None;
118    }
119
120    let tokens = super::tokens::count_tokens(content);
121    let entry = ArchiveEntry {
122        id: id.clone(),
123        tool: tool.to_string(),
124        command: command.to_string(),
125        size_chars: content.len(),
126        size_tokens: tokens,
127        created_at: Utc::now(),
128        session_id: session_id.map(|s| s.to_string()),
129    };
130
131    if let Ok(json) = serde_json::to_string_pretty(&entry) {
132        let meta_tmp = meta_path(&id).with_extension(format!("tmp.{pid}"));
133        if std::fs::write(&meta_tmp, &json).is_ok() {
134            let _ = std::fs::rename(&meta_tmp, meta_path(&id));
135        }
136    }
137
138    Some(id)
139}
140
141pub fn retrieve(id: &str) -> Option<String> {
142    let path = content_path(id);
143    std::fs::read_to_string(path).ok()
144}
145
146pub fn retrieve_with_range(id: &str, start: usize, end: usize) -> Option<String> {
147    let content = retrieve(id)?;
148    let lines: Vec<&str> = content.lines().collect();
149    let start = start.saturating_sub(1).min(lines.len());
150    let end = end.min(lines.len());
151    if start >= end {
152        return Some(String::new());
153    }
154    Some(
155        lines[start..end]
156            .iter()
157            .enumerate()
158            .map(|(i, line)| format!("{:>6}|{line}", start + i + 1))
159            .collect::<Vec<_>>()
160            .join("\n"),
161    )
162}
163
164pub fn retrieve_with_search(id: &str, pattern: &str) -> Option<String> {
165    let content = retrieve(id)?;
166    let pattern_lower = pattern.to_lowercase();
167    let matches: Vec<String> = content
168        .lines()
169        .enumerate()
170        .filter(|(_, line)| line.to_lowercase().contains(&pattern_lower))
171        .map(|(i, line)| format!("{:>6}|{line}", i + 1))
172        .collect();
173
174    if matches.is_empty() {
175        Some(format!("No matches for \"{pattern}\" in archive {id}"))
176    } else {
177        Some(format!(
178            "{} match(es) for \"{}\":\n{}",
179            matches.len(),
180            pattern,
181            matches.join("\n")
182        ))
183    }
184}
185
186pub fn list_entries(session_id: Option<&str>) -> Vec<ArchiveEntry> {
187    let base = archive_base_dir();
188    if !base.exists() {
189        return Vec::new();
190    }
191    let mut entries = Vec::new();
192    if let Ok(dirs) = std::fs::read_dir(&base) {
193        for dir_entry in dirs.flatten() {
194            if !dir_entry.path().is_dir() {
195                continue;
196            }
197            if let Ok(files) = std::fs::read_dir(dir_entry.path()) {
198                for file in files.flatten() {
199                    let path = file.path();
200                    if path.extension().and_then(|e| e.to_str()) != Some("json") {
201                        continue;
202                    }
203                    if let Ok(data) = std::fs::read_to_string(&path) {
204                        if let Ok(entry) = serde_json::from_str::<ArchiveEntry>(&data) {
205                            if let Some(sid) = session_id {
206                                if entry.session_id.as_deref() != Some(sid) {
207                                    continue;
208                                }
209                            }
210                            entries.push(entry);
211                        }
212                    }
213                }
214            }
215        }
216    }
217    entries.sort_by(|a, b| b.created_at.cmp(&a.created_at));
218    entries
219}
220
221pub fn cleanup() -> u32 {
222    let max_hours = max_age_hours();
223    let cutoff = Utc::now() - chrono::Duration::hours(max_hours as i64);
224    let base = archive_base_dir();
225    if !base.exists() {
226        return 0;
227    }
228    let mut removed = 0u32;
229    if let Ok(dirs) = std::fs::read_dir(&base) {
230        for dir_entry in dirs.flatten() {
231            if !dir_entry.path().is_dir() {
232                continue;
233            }
234            if let Ok(files) = std::fs::read_dir(dir_entry.path()) {
235                for file in files.flatten() {
236                    let path = file.path();
237                    if path.extension().and_then(|e| e.to_str()) != Some("json") {
238                        continue;
239                    }
240                    if let Ok(data) = std::fs::read_to_string(&path) {
241                        if let Ok(entry) = serde_json::from_str::<ArchiveEntry>(&data) {
242                            if entry.created_at < cutoff {
243                                let c = content_path(&entry.id);
244                                let _ = std::fs::remove_file(&c);
245                                let _ = std::fs::remove_file(&path);
246                                removed += 1;
247                            }
248                        }
249                    }
250                }
251            }
252        }
253    }
254    removed
255}
256
257pub fn disk_usage_bytes() -> u64 {
258    let base = archive_base_dir();
259    if !base.exists() {
260        return 0;
261    }
262    let mut total = 0u64;
263    if let Ok(dirs) = std::fs::read_dir(&base) {
264        for dir_entry in dirs.flatten() {
265            if let Ok(files) = std::fs::read_dir(dir_entry.path()) {
266                for file in files.flatten() {
267                    total += file.metadata().map(|m| m.len()).unwrap_or(0);
268                }
269            }
270        }
271    }
272    total
273}
274
275pub fn format_hint(id: &str, size_chars: usize, size_tokens: usize) -> String {
276    format!("[Archived: {size_chars} chars ({size_tokens} tok). Retrieve: ctx_expand(id=\"{id}\")]")
277}
278
279#[cfg(test)]
280mod tests {
281    use super::*;
282
283    #[test]
284    fn compute_id_deterministic() {
285        let id1 = compute_id("test content");
286        let id2 = compute_id("test content");
287        assert_eq!(id1, id2);
288        let id3 = compute_id("different content");
289        assert_ne!(id1, id3);
290    }
291
292    #[test]
293    fn nonexistent_id_returns_none() {
294        assert!(retrieve("nonexistent_archive_id_xyz").is_none());
295    }
296
297    #[test]
298    fn format_hint_readable() {
299        let hint = format_hint("abc123", 5000, 1200);
300        assert!(hint.contains("5000 chars"));
301        assert!(hint.contains("1200 tok"));
302        assert!(hint.contains("ctx_expand"));
303        assert!(hint.contains("abc123"));
304    }
305}