Skip to main content

lean_ctx/core/
archive.rs

1use chrono::{DateTime, Utc};
2use serde::{Deserialize, Serialize};
3use std::path::PathBuf;
4
5use super::data_dir::lean_ctx_data_dir;
6
7#[derive(Debug, Clone, Serialize, Deserialize)]
8pub struct ArchiveEntry {
9    pub id: String,
10    pub tool: String,
11    pub command: String,
12    pub size_chars: usize,
13    pub size_tokens: usize,
14    pub created_at: DateTime<Utc>,
15    pub session_id: Option<String>,
16}
17
18fn archive_base_dir() -> PathBuf {
19    lean_ctx_data_dir()
20        .unwrap_or_else(|_| PathBuf::from(".lean-ctx"))
21        .join("archives")
22}
23
24fn entry_dir(id: &str) -> PathBuf {
25    let prefix = if id.len() >= 2 { &id[..2] } else { id };
26    archive_base_dir().join(prefix)
27}
28
29fn content_path(id: &str) -> PathBuf {
30    entry_dir(id).join(format!("{id}.txt"))
31}
32
33fn meta_path(id: &str) -> PathBuf {
34    entry_dir(id).join(format!("{id}.meta.json"))
35}
36
37#[cfg(unix)]
38fn set_private_file_perms(path: &PathBuf) {
39    use std::os::unix::fs::PermissionsExt;
40    let _ = std::fs::set_permissions(path, std::fs::Permissions::from_mode(0o600));
41}
42
43fn compute_id(content: &str) -> String {
44    use std::collections::hash_map::DefaultHasher;
45    use std::hash::{Hash, Hasher};
46    let mut hasher = DefaultHasher::new();
47    content.hash(&mut hasher);
48    let hash = hasher.finish();
49    format!("{hash:016x}")
50}
51
52pub fn is_enabled() -> bool {
53    if let Ok(v) = std::env::var("LEAN_CTX_ARCHIVE") {
54        return !matches!(v.as_str(), "0" | "false" | "off");
55    }
56    super::config::Config::load().archive.enabled
57}
58
59fn threshold_chars() -> usize {
60    if let Ok(v) = std::env::var("LEAN_CTX_ARCHIVE_THRESHOLD") {
61        if let Ok(n) = v.parse::<usize>() {
62            return n;
63        }
64    }
65    super::config::Config::load().archive.threshold_chars
66}
67
68fn max_age_hours() -> u64 {
69    if let Ok(v) = std::env::var("LEAN_CTX_ARCHIVE_TTL") {
70        if let Ok(n) = v.parse::<u64>() {
71            return n;
72        }
73    }
74    super::config::Config::load().archive.max_age_hours
75}
76
77pub fn should_archive(content: &str) -> bool {
78    is_enabled() && content.len() >= threshold_chars()
79}
80
81const MAX_ARCHIVE_SIZE: usize = 10 * 1024 * 1024; // 10 MB
82
83pub fn store(tool: &str, command: &str, content: &str, session_id: Option<&str>) -> Option<String> {
84    if !is_enabled() || content.is_empty() {
85        return None;
86    }
87
88    let content = if content.len() > MAX_ARCHIVE_SIZE {
89        &content[..MAX_ARCHIVE_SIZE]
90    } else {
91        content
92    };
93
94    let id = compute_id(content);
95    let c_path = content_path(&id);
96
97    // Fast path: content already archived (idempotent, no race)
98    if c_path.exists() {
99        return Some(id);
100    }
101
102    let dir = entry_dir(&id);
103    if std::fs::create_dir_all(&dir).is_err() {
104        return None;
105    }
106
107    // Atomic write: PID-unique tmp file prevents race between parallel writers.
108    // rename() is atomic on POSIX; on Windows it replaces atomically too.
109    // If two processes race past the exists() check, both write their own tmp
110    // file and both rename to the same target — last writer wins, content is
111    // identical (same hash), so the result is correct either way.
112    let pid = std::process::id();
113    let tmp_path = c_path.with_extension(format!("tmp.{pid}"));
114    if std::fs::write(&tmp_path, content).is_err() {
115        return None;
116    }
117    if std::fs::rename(&tmp_path, &c_path).is_err() {
118        let _ = std::fs::remove_file(&tmp_path);
119        // Another process may have won the race — check if content is there now
120        if c_path.exists() {
121            return Some(id);
122        }
123        return None;
124    }
125    #[cfg(unix)]
126    set_private_file_perms(&c_path);
127
128    let tokens = super::tokens::count_tokens(content);
129    let entry = ArchiveEntry {
130        id: id.clone(),
131        tool: tool.to_string(),
132        command: command.to_string(),
133        size_chars: content.len(),
134        size_tokens: tokens,
135        created_at: Utc::now(),
136        session_id: session_id.map(std::string::ToString::to_string),
137    };
138
139    if let Ok(json) = serde_json::to_string_pretty(&entry) {
140        let meta_tmp = meta_path(&id).with_extension(format!("tmp.{pid}"));
141        if std::fs::write(&meta_tmp, &json).is_ok() {
142            let meta_final = meta_path(&id);
143            let _ = std::fs::rename(&meta_tmp, &meta_final);
144            #[cfg(unix)]
145            set_private_file_perms(&meta_final);
146        }
147    }
148
149    super::archive_fts::index_entry(&id, tool, command, content);
150
151    Some(id)
152}
153
154pub fn retrieve(id: &str) -> Option<String> {
155    let path = content_path(id);
156    std::fs::read_to_string(path).ok()
157}
158
159pub fn retrieve_with_range(id: &str, start: usize, end: usize) -> Option<String> {
160    let content = retrieve(id)?;
161    let lines: Vec<&str> = content.lines().collect();
162    let start = start.saturating_sub(1).min(lines.len());
163    let end = end.min(lines.len());
164    if start >= end {
165        return Some(String::new());
166    }
167    Some(
168        lines[start..end]
169            .iter()
170            .enumerate()
171            .map(|(i, line)| format!("{:>6}|{line}", start + i + 1))
172            .collect::<Vec<_>>()
173            .join("\n"),
174    )
175}
176
177pub fn retrieve_with_search(id: &str, pattern: &str) -> Option<String> {
178    let content = retrieve(id)?;
179    let pattern_lower = pattern.to_lowercase();
180    let matches: Vec<String> = content
181        .lines()
182        .enumerate()
183        .filter(|(_, line)| line.to_lowercase().contains(&pattern_lower))
184        .map(|(i, line)| format!("{:>6}|{line}", i + 1))
185        .collect();
186
187    if matches.is_empty() {
188        Some(format!("No matches for \"{pattern}\" in archive {id}"))
189    } else {
190        Some(format!(
191            "{} match(es) for \"{}\":\n{}",
192            matches.len(),
193            pattern,
194            matches.join("\n")
195        ))
196    }
197}
198
199pub fn list_entries(session_id: Option<&str>) -> Vec<ArchiveEntry> {
200    let base = archive_base_dir();
201    if !base.exists() {
202        return Vec::new();
203    }
204    let mut entries = Vec::new();
205    if let Ok(dirs) = std::fs::read_dir(&base) {
206        for dir_entry in dirs.flatten() {
207            if !dir_entry.path().is_dir() {
208                continue;
209            }
210            if let Ok(files) = std::fs::read_dir(dir_entry.path()) {
211                for file in files.flatten() {
212                    let path = file.path();
213                    if path.extension().and_then(|e| e.to_str()) != Some("json") {
214                        continue;
215                    }
216                    if let Ok(data) = std::fs::read_to_string(&path) {
217                        if let Ok(entry) = serde_json::from_str::<ArchiveEntry>(&data) {
218                            if let Some(sid) = session_id {
219                                if entry.session_id.as_deref() != Some(sid) {
220                                    continue;
221                                }
222                            }
223                            entries.push(entry);
224                        }
225                    }
226                }
227            }
228        }
229    }
230    entries.sort_by_key(|e| std::cmp::Reverse(e.created_at));
231    entries
232}
233
234pub fn cleanup() -> u32 {
235    let max_hours = max_age_hours();
236    let cutoff = Utc::now() - chrono::Duration::hours(max_hours as i64);
237    let base = archive_base_dir();
238    if !base.exists() {
239        return 0;
240    }
241    let mut removed = 0u32;
242    if let Ok(dirs) = std::fs::read_dir(&base) {
243        for dir_entry in dirs.flatten() {
244            if !dir_entry.path().is_dir() {
245                continue;
246            }
247            if let Ok(files) = std::fs::read_dir(dir_entry.path()) {
248                for file in files.flatten() {
249                    let path = file.path();
250                    if path.extension().and_then(|e| e.to_str()) != Some("json") {
251                        continue;
252                    }
253                    if let Ok(data) = std::fs::read_to_string(&path) {
254                        if let Ok(entry) = serde_json::from_str::<ArchiveEntry>(&data) {
255                            if entry.created_at < cutoff {
256                                let c = content_path(&entry.id);
257                                let _ = std::fs::remove_file(&c);
258                                let _ = std::fs::remove_file(&path);
259                                super::archive_fts::remove_entry(&entry.id);
260                                removed += 1;
261                            }
262                        }
263                    }
264                }
265            }
266        }
267    }
268    removed
269}
270
271pub fn disk_usage_bytes() -> u64 {
272    let base = archive_base_dir();
273    if !base.exists() {
274        return 0;
275    }
276    let mut total = 0u64;
277    if let Ok(dirs) = std::fs::read_dir(&base) {
278        for dir_entry in dirs.flatten() {
279            if let Ok(files) = std::fs::read_dir(dir_entry.path()) {
280                for file in files.flatten() {
281                    total += file.metadata().map_or(0, |m| m.len());
282                }
283            }
284        }
285    }
286    total
287}
288
289pub fn format_hint(id: &str, size_chars: usize, size_tokens: usize) -> String {
290    format!("[Archived: {size_chars} chars ({size_tokens} tok). Retrieve: ctx_expand(id=\"{id}\")]")
291}
292
293#[cfg(test)]
294mod tests {
295    use super::*;
296
297    #[test]
298    fn compute_id_deterministic() {
299        let id1 = compute_id("test content");
300        let id2 = compute_id("test content");
301        assert_eq!(id1, id2);
302        let id3 = compute_id("different content");
303        assert_ne!(id1, id3);
304    }
305
306    #[test]
307    fn nonexistent_id_returns_none() {
308        assert!(retrieve("nonexistent_archive_id_xyz").is_none());
309    }
310
311    #[test]
312    fn format_hint_readable() {
313        let hint = format_hint("abc123", 5000, 1200);
314        assert!(hint.contains("5000 chars"));
315        assert!(hint.contains("1200 tok"));
316        assert!(hint.contains("ctx_expand"));
317        assert!(hint.contains("abc123"));
318    }
319}