Skip to main content

lean_ctx/core/
archive.rs

1use chrono::{DateTime, Utc};
2use serde::{Deserialize, Serialize};
3use std::path::PathBuf;
4
5use super::data_dir::lean_ctx_data_dir;
6
7#[derive(Debug, Clone, Serialize, Deserialize)]
8pub struct ArchiveEntry {
9    pub id: String,
10    pub tool: String,
11    pub command: String,
12    pub size_chars: usize,
13    pub size_tokens: usize,
14    pub created_at: DateTime<Utc>,
15    pub session_id: Option<String>,
16}
17
18fn archive_base_dir() -> PathBuf {
19    lean_ctx_data_dir()
20        .unwrap_or_else(|_| PathBuf::from(".lean-ctx"))
21        .join("archives")
22}
23
24fn entry_dir(id: &str) -> PathBuf {
25    let prefix = if id.len() >= 2 { &id[..2] } else { id };
26    archive_base_dir().join(prefix)
27}
28
29fn content_path(id: &str) -> PathBuf {
30    entry_dir(id).join(format!("{id}.txt"))
31}
32
33fn meta_path(id: &str) -> PathBuf {
34    entry_dir(id).join(format!("{id}.meta.json"))
35}
36
37#[cfg(unix)]
38fn set_private_file_perms(path: &PathBuf) {
39    use std::os::unix::fs::PermissionsExt;
40    let _ = std::fs::set_permissions(path, std::fs::Permissions::from_mode(0o600));
41}
42
43fn compute_id(content: &str) -> String {
44    use std::collections::hash_map::DefaultHasher;
45    use std::hash::{Hash, Hasher};
46    let mut hasher = DefaultHasher::new();
47    content.hash(&mut hasher);
48    let hash = hasher.finish();
49    format!("{hash:016x}")
50}
51
52pub fn is_enabled() -> bool {
53    if let Ok(v) = std::env::var("LEAN_CTX_ARCHIVE") {
54        return !matches!(v.as_str(), "0" | "false" | "off");
55    }
56    super::config::Config::load().archive.enabled
57}
58
59fn threshold_chars() -> usize {
60    if let Ok(v) = std::env::var("LEAN_CTX_ARCHIVE_THRESHOLD") {
61        if let Ok(n) = v.parse::<usize>() {
62            return n;
63        }
64    }
65    super::config::Config::load().archive.threshold_chars
66}
67
68fn max_age_hours() -> u64 {
69    if let Ok(v) = std::env::var("LEAN_CTX_ARCHIVE_TTL") {
70        if let Ok(n) = v.parse::<u64>() {
71            return n;
72        }
73    }
74    super::config::Config::load().archive.max_age_hours
75}
76
77pub fn should_archive(content: &str) -> bool {
78    is_enabled() && content.len() >= threshold_chars()
79}
80
81const MAX_ARCHIVE_SIZE: usize = 10 * 1024 * 1024; // 10 MB
82
83pub fn store(tool: &str, command: &str, content: &str, session_id: Option<&str>) -> Option<String> {
84    if !is_enabled() || content.is_empty() {
85        return None;
86    }
87
88    let content = if content.len() > MAX_ARCHIVE_SIZE {
89        &content[..MAX_ARCHIVE_SIZE]
90    } else {
91        content
92    };
93
94    let id = compute_id(content);
95    let c_path = content_path(&id);
96
97    // Fast path: content already archived (idempotent, no race)
98    if c_path.exists() {
99        return Some(id);
100    }
101
102    let dir = entry_dir(&id);
103    if std::fs::create_dir_all(&dir).is_err() {
104        return None;
105    }
106
107    // Atomic write: PID-unique tmp file prevents race between parallel writers.
108    // rename() is atomic on POSIX; on Windows it replaces atomically too.
109    // If two processes race past the exists() check, both write their own tmp
110    // file and both rename to the same target — last writer wins, content is
111    // identical (same hash), so the result is correct either way.
112    let pid = std::process::id();
113    let tmp_path = c_path.with_extension(format!("tmp.{pid}"));
114    if std::fs::write(&tmp_path, content).is_err() {
115        return None;
116    }
117    if std::fs::rename(&tmp_path, &c_path).is_err() {
118        let _ = std::fs::remove_file(&tmp_path);
119        // Another process may have won the race — check if content is there now
120        if c_path.exists() {
121            return Some(id);
122        }
123        return None;
124    }
125    #[cfg(unix)]
126    set_private_file_perms(&c_path);
127
128    let tokens = super::tokens::count_tokens(content);
129    let entry = ArchiveEntry {
130        id: id.clone(),
131        tool: tool.to_string(),
132        command: command.to_string(),
133        size_chars: content.len(),
134        size_tokens: tokens,
135        created_at: Utc::now(),
136        session_id: session_id.map(std::string::ToString::to_string),
137    };
138
139    if let Ok(json) = serde_json::to_string_pretty(&entry) {
140        let meta_tmp = meta_path(&id).with_extension(format!("tmp.{pid}"));
141        if std::fs::write(&meta_tmp, &json).is_ok() {
142            let meta_final = meta_path(&id);
143            let _ = std::fs::rename(&meta_tmp, &meta_final);
144            #[cfg(unix)]
145            set_private_file_perms(&meta_final);
146        }
147    }
148
149    Some(id)
150}
151
152pub fn retrieve(id: &str) -> Option<String> {
153    let path = content_path(id);
154    std::fs::read_to_string(path).ok()
155}
156
157pub fn retrieve_with_range(id: &str, start: usize, end: usize) -> Option<String> {
158    let content = retrieve(id)?;
159    let lines: Vec<&str> = content.lines().collect();
160    let start = start.saturating_sub(1).min(lines.len());
161    let end = end.min(lines.len());
162    if start >= end {
163        return Some(String::new());
164    }
165    Some(
166        lines[start..end]
167            .iter()
168            .enumerate()
169            .map(|(i, line)| format!("{:>6}|{line}", start + i + 1))
170            .collect::<Vec<_>>()
171            .join("\n"),
172    )
173}
174
175pub fn retrieve_with_search(id: &str, pattern: &str) -> Option<String> {
176    let content = retrieve(id)?;
177    let pattern_lower = pattern.to_lowercase();
178    let matches: Vec<String> = content
179        .lines()
180        .enumerate()
181        .filter(|(_, line)| line.to_lowercase().contains(&pattern_lower))
182        .map(|(i, line)| format!("{:>6}|{line}", i + 1))
183        .collect();
184
185    if matches.is_empty() {
186        Some(format!("No matches for \"{pattern}\" in archive {id}"))
187    } else {
188        Some(format!(
189            "{} match(es) for \"{}\":\n{}",
190            matches.len(),
191            pattern,
192            matches.join("\n")
193        ))
194    }
195}
196
197pub fn list_entries(session_id: Option<&str>) -> Vec<ArchiveEntry> {
198    let base = archive_base_dir();
199    if !base.exists() {
200        return Vec::new();
201    }
202    let mut entries = Vec::new();
203    if let Ok(dirs) = std::fs::read_dir(&base) {
204        for dir_entry in dirs.flatten() {
205            if !dir_entry.path().is_dir() {
206                continue;
207            }
208            if let Ok(files) = std::fs::read_dir(dir_entry.path()) {
209                for file in files.flatten() {
210                    let path = file.path();
211                    if path.extension().and_then(|e| e.to_str()) != Some("json") {
212                        continue;
213                    }
214                    if let Ok(data) = std::fs::read_to_string(&path) {
215                        if let Ok(entry) = serde_json::from_str::<ArchiveEntry>(&data) {
216                            if let Some(sid) = session_id {
217                                if entry.session_id.as_deref() != Some(sid) {
218                                    continue;
219                                }
220                            }
221                            entries.push(entry);
222                        }
223                    }
224                }
225            }
226        }
227    }
228    entries.sort_by_key(|e| std::cmp::Reverse(e.created_at));
229    entries
230}
231
232pub fn cleanup() -> u32 {
233    let max_hours = max_age_hours();
234    let cutoff = Utc::now() - chrono::Duration::hours(max_hours as i64);
235    let base = archive_base_dir();
236    if !base.exists() {
237        return 0;
238    }
239    let mut removed = 0u32;
240    if let Ok(dirs) = std::fs::read_dir(&base) {
241        for dir_entry in dirs.flatten() {
242            if !dir_entry.path().is_dir() {
243                continue;
244            }
245            if let Ok(files) = std::fs::read_dir(dir_entry.path()) {
246                for file in files.flatten() {
247                    let path = file.path();
248                    if path.extension().and_then(|e| e.to_str()) != Some("json") {
249                        continue;
250                    }
251                    if let Ok(data) = std::fs::read_to_string(&path) {
252                        if let Ok(entry) = serde_json::from_str::<ArchiveEntry>(&data) {
253                            if entry.created_at < cutoff {
254                                let c = content_path(&entry.id);
255                                let _ = std::fs::remove_file(&c);
256                                let _ = std::fs::remove_file(&path);
257                                removed += 1;
258                            }
259                        }
260                    }
261                }
262            }
263        }
264    }
265    removed
266}
267
268pub fn disk_usage_bytes() -> u64 {
269    let base = archive_base_dir();
270    if !base.exists() {
271        return 0;
272    }
273    let mut total = 0u64;
274    if let Ok(dirs) = std::fs::read_dir(&base) {
275        for dir_entry in dirs.flatten() {
276            if let Ok(files) = std::fs::read_dir(dir_entry.path()) {
277                for file in files.flatten() {
278                    total += file.metadata().map_or(0, |m| m.len());
279                }
280            }
281        }
282    }
283    total
284}
285
286pub fn format_hint(id: &str, size_chars: usize, size_tokens: usize) -> String {
287    format!("[Archived: {size_chars} chars ({size_tokens} tok). Retrieve: ctx_expand(id=\"{id}\")]")
288}
289
290#[cfg(test)]
291mod tests {
292    use super::*;
293
294    #[test]
295    fn compute_id_deterministic() {
296        let id1 = compute_id("test content");
297        let id2 = compute_id("test content");
298        assert_eq!(id1, id2);
299        let id3 = compute_id("different content");
300        assert_ne!(id1, id3);
301    }
302
303    #[test]
304    fn nonexistent_id_returns_none() {
305        assert!(retrieve("nonexistent_archive_id_xyz").is_none());
306    }
307
308    #[test]
309    fn format_hint_readable() {
310        let hint = format_hint("abc123", 5000, 1200);
311        assert!(hint.contains("5000 chars"));
312        assert!(hint.contains("1200 tok"));
313        assert!(hint.contains("ctx_expand"));
314        assert!(hint.contains("abc123"));
315    }
316}