Skip to main content

bones_core/cache/
writer.rs

1//! File I/O helpers for the binary columnar cache.
2
3use std::fs;
4use std::path::Path;
5use std::time::{SystemTime, UNIX_EPOCH};
6
7use anyhow::{Context, Result};
8
9use crate::cache::{decode_events, encode_events, fingerprint_dir};
10use crate::event::Event;
11use crate::event::parser::parse_lines;
12use crate::shard::ShardManager;
13
14/// Statistics returned after writing a cache file.
15#[derive(Debug, Clone, PartialEq)]
16pub struct CacheStats {
17    /// Total events encoded in the cache file.
18    pub total_events: usize,
19    /// Final cache file size in bytes.
20    pub file_size_bytes: u64,
21    /// Approximate compressed size / source size ratio.
22    pub compression_ratio: f64,
23}
24
25/// Writes events to on-disk binary cache format.
26#[derive(Debug, Default)]
27pub struct CacheWriter {
28    events: Vec<Event>,
29}
30
31impl CacheWriter {
32    /// Create an empty cache writer.
33    #[must_use]
34    pub fn new() -> Self {
35        Self::default()
36    }
37
38    /// Add one event to the writer buffer.
39    pub fn push_event(&mut self, event: &Event) {
40        self.events.push(event.clone());
41    }
42
43    /// Encode all buffered events and write them to `path`.
44    ///
45    /// Parent directories are created automatically.
46    ///
47    /// # Errors
48    ///
49    /// Returns an error if encoding or file I/O fails.
50    pub fn write_to_file(&self, path: &Path) -> Result<CacheStats> {
51        self.write_to_file_with_created_at_us(path, now_us())
52    }
53
54    /// Encode all buffered events with an explicit header timestamp/fingerprint.
55    ///
56    /// Cache lifecycle callers use this to store their freshness fingerprint in
57    /// the legacy `created_at_us` header field.
58    ///
59    /// # Errors
60    ///
61    /// Returns an error if encoding or file I/O fails.
62    pub fn write_to_file_with_created_at_us(
63        &self,
64        path: &Path,
65        created_at_us: u64,
66    ) -> Result<CacheStats> {
67        if let Some(parent) = path.parent() {
68            fs::create_dir_all(parent)
69                .with_context(|| format!("create cache dir {}", parent.display()))?;
70        }
71
72        let bytes = encode_events(&self.events, created_at_us)
73            .map_err(|e| anyhow::anyhow!("encode cache events: {e}"))?;
74
75        fs::write(path, &bytes).with_context(|| format!("write cache file {}", path.display()))?;
76
77        let source_bytes = estimated_source_bytes(&self.events);
78        let compression_ratio = if source_bytes == 0 {
79            1.0
80        } else {
81            {
82                // Saturate to u32 for lossless f64 conversion; cache files
83                // are always well under 4 GiB so no information is lost.
84                let num = u32::try_from(bytes.len()).unwrap_or(u32::MAX);
85                let den = u32::try_from(source_bytes).unwrap_or(u32::MAX);
86                f64::from(num) / f64::from(den)
87            }
88        };
89
90        Ok(CacheStats {
91            total_events: self.events.len(),
92            file_size_bytes: bytes.len() as u64,
93            compression_ratio,
94        })
95    }
96
97    /// Append `new_events` to an existing cache file, rewriting the whole file.
98    ///
99    /// If the cache file does not exist, this behaves like a normal write with
100    /// just `new_events`.
101    ///
102    /// # Errors
103    ///
104    /// Returns an error if decoding existing data, encoding, or file I/O fails.
105    pub fn append_incremental(existing: &Path, new_events: &[Event]) -> Result<CacheStats> {
106        let mut all_events = if existing.exists() {
107            let data = fs::read(existing)
108                .with_context(|| format!("read existing cache {}", existing.display()))?;
109            let (_header, events) =
110                decode_events(&data).map_err(|e| anyhow::anyhow!("decode existing cache: {e}"))?;
111            events
112        } else {
113            Vec::new()
114        };
115
116        all_events.extend_from_slice(new_events);
117
118        let writer = Self { events: all_events };
119        writer.write_to_file(existing)
120    }
121}
122
123/// Rebuild cache from `.bones/events` shards and write to `cache_path`.
124///
125/// # Errors
126///
127/// Returns an error if shard replay, parsing, encoding, or file I/O fails.
128pub fn rebuild_cache(events_dir: &Path, cache_path: &Path) -> Result<CacheStats> {
129    let bones_dir = events_dir.parent().unwrap_or_else(|| Path::new("."));
130    let shard_mgr = ShardManager::new(bones_dir);
131
132    let content = shard_mgr
133        .replay()
134        .map_err(|e| anyhow::anyhow!("replay shards: {e}"))?;
135
136    let events = parse_lines(&content)
137        .map_err(|(line, e)| anyhow::anyhow!("parse error at line {line}: {e}"))?;
138
139    let mut writer = CacheWriter::new();
140    for event in &events {
141        writer.push_event(event);
142    }
143
144    let fingerprint = fingerprint_dir(events_dir).context("compute cache freshness fingerprint")?;
145    writer.write_to_file_with_created_at_us(cache_path, fingerprint)
146}
147
148fn estimated_source_bytes(events: &[Event]) -> usize {
149    events
150        .iter()
151        .map(|event| serde_json::to_vec(event).map_or(0, |v| v.len() + 1))
152        .sum()
153}
154
155fn now_us() -> u64 {
156    SystemTime::now()
157        .duration_since(UNIX_EPOCH)
158        .map_or(0, |d| u64::try_from(d.as_micros()).unwrap_or(u64::MAX))
159}
160
161#[cfg(test)]
162mod tests {
163    use std::collections::BTreeMap;
164
165    use tempfile::TempDir;
166
167    use super::*;
168    use crate::event::data::{CreateData, EventData, MoveData};
169    use crate::event::types::EventType;
170    use crate::event::{self, Event};
171    use crate::model::item::{Kind, State, Urgency};
172    use crate::model::item_id::ItemId;
173
174    fn make_event(item_id: &str, ts: i64, kind: EventType) -> Event {
175        let data = match kind {
176            EventType::Create => EventData::Create(CreateData {
177                title: format!("Item {item_id}"),
178                kind: Kind::Task,
179                size: None,
180                urgency: Urgency::Default,
181                labels: Vec::new(),
182                parent: None,
183                causation: None,
184                description: None,
185                extra: BTreeMap::new(),
186            }),
187            _ => EventData::Move(MoveData {
188                state: State::Doing,
189                reason: None,
190                extra: BTreeMap::new(),
191            }),
192        };
193
194        let mut event = Event {
195            wall_ts_us: ts,
196            agent: "test-agent".to_string(),
197            itc: "itc:AQ".to_string(),
198            parents: Vec::new(),
199            event_type: kind,
200            item_id: ItemId::new_unchecked(item_id),
201            data,
202            event_hash: String::new(),
203        };
204        let _ = event::writer::write_event(&mut event);
205        event
206    }
207
208    #[test]
209    fn write_to_file_round_trips_events() {
210        let tmp = TempDir::new().expect("tempdir");
211        let cache_path = tmp.path().join(".bones/cache/events.bin");
212
213        let mut writer = CacheWriter::new();
214        writer.push_event(&make_event("bn-a1", 1000, EventType::Create));
215        writer.push_event(&make_event("bn-a1", 2000, EventType::Move));
216
217        let stats = writer.write_to_file(&cache_path).expect("write cache");
218        assert_eq!(stats.total_events, 2);
219        assert!(stats.file_size_bytes > 0);
220
221        let data = fs::read(cache_path).expect("read cache file");
222        let (_header, decoded) = decode_events(&data).expect("decode cache file");
223        assert_eq!(decoded.len(), 2);
224    }
225
226    #[test]
227    fn append_incremental_appends_new_events() {
228        let tmp = TempDir::new().expect("tempdir");
229        let cache_path = tmp.path().join(".bones/cache/events.bin");
230
231        let mut writer = CacheWriter::new();
232        writer.push_event(&make_event("bn-a1", 1000, EventType::Create));
233        writer.write_to_file(&cache_path).expect("seed cache");
234
235        let new_events = vec![make_event("bn-a2", 2000, EventType::Create)];
236        let stats = CacheWriter::append_incremental(&cache_path, &new_events)
237            .expect("append cache incrementally");
238
239        assert_eq!(stats.total_events, 2);
240
241        let data = fs::read(cache_path).expect("read cache file");
242        let (_header, decoded) = decode_events(&data).expect("decode cache file");
243        assert_eq!(decoded.len(), 2);
244    }
245
246    #[test]
247    fn rebuild_cache_reads_events_shards() {
248        let tmp = TempDir::new().expect("tempdir");
249        let bones_dir = tmp.path().join(".bones");
250        let shard_mgr = ShardManager::new(&bones_dir);
251        shard_mgr.ensure_dirs().expect("ensure dirs");
252        shard_mgr.init().expect("init");
253
254        let mut event = make_event("bn-z9", 42, EventType::Create);
255        let line = event::writer::write_line(&event).expect("line");
256        let (year, month) = shard_mgr
257            .active_shard()
258            .expect("active shard")
259            .expect("active shard value");
260        shard_mgr
261            .append_raw(year, month, &line)
262            .expect("append event");
263
264        let events_dir = bones_dir.join("events");
265        let cache_path = bones_dir.join("cache/events.bin");
266        let stats = rebuild_cache(&events_dir, &cache_path).expect("rebuild cache");
267
268        assert_eq!(stats.total_events, 1);
269        assert!(cache_path.exists());
270
271        // ensure parse/hash fields remain valid enough to decode
272        event.event_hash.clear();
273    }
274}