Skip to main content

bones_core/cache/
columns.rs

1//! Per-column type definitions for the binary event cache.
2//!
3//! A `CacheColumns` struct holds one column of each type for a batch of
4//! events. It is used as the intermediate representation between the
5//! [`super::CacheHeader`] metadata and the encoded column bytes.
6//!
7//! See `docs/binary-cache-format.md` for the byte-level layout.
8
9use crate::event::{Event, EventType};
10use crate::model::item_id::ItemId;
11
12/// The column index constants for this format version.
13pub const COL_TIMESTAMPS: usize = 0;
14pub const COL_AGENTS: usize = 1;
15pub const COL_EVENT_TYPES: usize = 2;
16pub const COL_ITEM_IDS: usize = 3;
17pub const COL_PARENTS: usize = 4;
18pub const COL_ITC: usize = 5;
19pub const COL_VALUES: usize = 6;
20
21/// Total number of columns in the cache format.
22pub const COLUMN_COUNT: usize = 7;
23
24/// All event columns for a batch of events, decomposed by type.
25///
26/// Each field holds one column of data (in event order). The `i`-th element
27/// of each column corresponds to the `i`-th event in the batch.
28///
29/// # Construction
30///
31/// Build with [`CacheColumns::from_events`]. Consume with
32/// [`CacheColumns::to_column_slices`] for encoding.
33#[derive(Debug, Clone, Default)]
34pub struct CacheColumns {
35    /// Wall-clock timestamps in microseconds since Unix epoch.
36    pub timestamps: Vec<i64>,
37
38    /// Agent identifier strings.
39    pub agents: Vec<String>,
40
41    /// Event type discriminants.
42    pub event_types: Vec<EventType>,
43
44    /// Item ID strings.
45    pub item_ids: Vec<String>,
46
47    /// Parent hash lists — each element is a comma-joined string of parent
48    /// hashes (empty string for root events).
49    pub parents: Vec<String>,
50
51    /// ITC stamp strings.
52    pub itc: Vec<String>,
53
54    /// JSON-serialised event payload strings.
55    pub values: Vec<String>,
56}
57
58impl CacheColumns {
59    /// Decompose a slice of events into parallel columns.
60    ///
61    /// # Errors
62    ///
63    /// Returns an error if any event's data fails to serialise to JSON.
64    pub fn from_events(events: &[Event]) -> Result<Self, serde_json::Error> {
65        let n = events.len();
66        let mut cols = Self {
67            timestamps: Vec::with_capacity(n),
68            agents: Vec::with_capacity(n),
69            event_types: Vec::with_capacity(n),
70            item_ids: Vec::with_capacity(n),
71            parents: Vec::with_capacity(n),
72            itc: Vec::with_capacity(n),
73            values: Vec::with_capacity(n),
74        };
75
76        for event in events {
77            cols.timestamps.push(event.wall_ts_us);
78            cols.agents.push(event.agent.clone());
79            cols.event_types.push(event.event_type);
80            cols.item_ids.push(event.item_id.as_str().to_string());
81            cols.parents.push(event.parents.join(","));
82            cols.itc.push(event.itc.clone());
83            cols.values.push(serde_json::to_string(&event.data)?);
84        }
85
86        Ok(cols)
87    }
88
89    /// Reconstruct events from parallel column data.
90    ///
91    /// All columns must have the same length. Parent hashes are split on
92    /// commas; the empty string yields an empty parent list (root event).
93    ///
94    /// # Errors
95    ///
96    /// Returns an error string if:
97    /// - Column lengths differ.
98    /// - An item ID string is not a valid `ItemId`.
99    /// - A value JSON string cannot be parsed as the event's data payload.
100    pub fn into_events(self) -> Result<Vec<Event>, String> {
101        let n = self.timestamps.len();
102        let check_len = |name: &str, len: usize| {
103            if len == n {
104                Ok(())
105            } else {
106                Err(format!(
107                    "column '{name}' length {len} != timestamps length {n}"
108                ))
109            }
110        };
111        check_len("agents", self.agents.len())?;
112        check_len("event_types", self.event_types.len())?;
113        check_len("item_ids", self.item_ids.len())?;
114        check_len("parents", self.parents.len())?;
115        check_len("itc", self.itc.len())?;
116        check_len("values", self.values.len())?;
117
118        let mut events = Vec::with_capacity(n);
119
120        for i in 0..n {
121            let event_type = self.event_types[i];
122            let item_id = ItemId::parse(&self.item_ids[i])
123                .map_err(|e| format!("row {i} invalid item_id: {e}"))?;
124
125            let parents: Vec<String> = if self.parents[i].is_empty() {
126                vec![]
127            } else {
128                self.parents[i].split(',').map(str::to_string).collect()
129            };
130
131            let data = crate::event::EventData::deserialize_for(event_type, &self.values[i])
132                .map_err(|e| format!("row {i} data parse error: {e}"))?;
133
134            events.push(Event {
135                wall_ts_us: self.timestamps[i],
136                agent: self.agents[i].clone(),
137                itc: self.itc[i].clone(),
138                parents,
139                event_type,
140                item_id,
141                data,
142                // event_hash is not stored in the cache columns (derived from
143                // content). Callers that need the hash must recompute it from
144                // the TSJSON writer or preserve it separately.
145                event_hash: String::new(),
146            });
147        }
148
149        Ok(events)
150    }
151
152    /// Return the number of events (rows) in this column set.
153    #[must_use]
154    pub const fn len(&self) -> usize {
155        self.timestamps.len()
156    }
157
158    /// Return `true` if there are no events in this column set.
159    #[must_use]
160    pub const fn is_empty(&self) -> bool {
161        self.timestamps.is_empty()
162    }
163}
164
165/// A single row extracted from the column arrays.
166///
167/// Useful for inspecting individual events without rebuilding a full [`Event`].
168#[derive(Debug, Clone, PartialEq, Eq)]
169pub struct ColumnRow {
170    /// Wall-clock timestamp.
171    pub wall_ts_us: i64,
172    /// Agent identifier.
173    pub agent: String,
174    /// Event type discriminant.
175    pub event_type: EventType,
176    /// Item ID string.
177    pub item_id: String,
178    /// Comma-joined parent hashes (empty for root events).
179    pub parents: String,
180    /// ITC stamp.
181    pub itc: String,
182    /// JSON payload string.
183    pub value: String,
184}
185
186impl CacheColumns {
187    /// Extract a single row by index.
188    ///
189    /// Returns `None` if `index >= self.len()`.
190    #[must_use]
191    pub fn row(&self, index: usize) -> Option<ColumnRow> {
192        if index >= self.len() {
193            return None;
194        }
195        Some(ColumnRow {
196            wall_ts_us: self.timestamps[index],
197            agent: self.agents[index].clone(),
198            event_type: self.event_types[index],
199            item_id: self.item_ids[index].clone(),
200            parents: self.parents[index].clone(),
201            itc: self.itc[index].clone(),
202            value: self.values[index].clone(),
203        })
204    }
205
206    /// Return only the event types column (useful for count-by-type queries).
207    #[must_use]
208    pub fn event_types(&self) -> &[EventType] {
209        &self.event_types
210    }
211
212    /// Return only the timestamps column (useful for range queries).
213    #[must_use]
214    pub fn timestamps(&self) -> &[i64] {
215        &self.timestamps
216    }
217}
218
219// ---------------------------------------------------------------------------
220// Tests
221// ---------------------------------------------------------------------------
222
223#[cfg(test)]
224mod tests {
225    use super::*;
226    use crate::event::data::{CommentData, CreateData, MoveData};
227    use crate::event::{Event, EventData, EventType};
228    use crate::model::item::{Kind, State, Urgency};
229    use crate::model::item_id::ItemId;
230    use std::collections::BTreeMap;
231
232    fn make_create_event(ts: i64, agent: &str, item: &str, title: &str) -> Event {
233        Event {
234            wall_ts_us: ts,
235            agent: agent.to_string(),
236            itc: "itc:AQ".to_string(),
237            parents: vec![],
238            event_type: EventType::Create,
239            item_id: ItemId::new_unchecked(item),
240            data: EventData::Create(CreateData {
241                title: title.to_string(),
242                kind: Kind::Task,
243                size: None,
244                urgency: Urgency::Default,
245                labels: vec![],
246                parent: None,
247                causation: None,
248                description: None,
249                extra: BTreeMap::new(),
250            }),
251            event_hash: format!("blake3:{ts:016x}"),
252        }
253    }
254
255    fn make_move_event(ts: i64, agent: &str, item: &str, parent_hash: &str) -> Event {
256        Event {
257            wall_ts_us: ts,
258            agent: agent.to_string(),
259            itc: "itc:AQ.1".to_string(),
260            parents: vec![parent_hash.to_string()],
261            event_type: EventType::Move,
262            item_id: ItemId::new_unchecked(item),
263            data: EventData::Move(MoveData {
264                state: State::Doing,
265                reason: None,
266                extra: BTreeMap::new(),
267            }),
268            event_hash: format!("blake3:move{ts:012x}"),
269        }
270    }
271
272    fn make_comment_event(ts: i64, agent: &str, item: &str, body: &str) -> Event {
273        Event {
274            wall_ts_us: ts,
275            agent: agent.to_string(),
276            itc: "itc:Bg".to_string(),
277            parents: vec![],
278            event_type: EventType::Comment,
279            item_id: ItemId::new_unchecked(item),
280            data: EventData::Comment(CommentData {
281                body: body.to_string(),
282                extra: BTreeMap::new(),
283            }),
284            event_hash: format!("blake3:cmt{ts:013x}"),
285        }
286    }
287
288    // === Column count constants ==========================================
289
290    #[test]
291    fn column_count_is_seven() {
292        assert_eq!(COLUMN_COUNT, 7);
293    }
294
295    #[test]
296    fn column_indices_are_distinct() {
297        let indices = [
298            COL_TIMESTAMPS,
299            COL_AGENTS,
300            COL_EVENT_TYPES,
301            COL_ITEM_IDS,
302            COL_PARENTS,
303            COL_ITC,
304            COL_VALUES,
305        ];
306        let set: std::collections::HashSet<_> = indices.iter().copied().collect();
307        assert_eq!(set.len(), COLUMN_COUNT, "column indices must be distinct");
308    }
309
310    // === CacheColumns::from_events ========================================
311
312    #[test]
313    fn from_events_empty() {
314        let cols = CacheColumns::from_events(&[]).unwrap();
315        assert!(cols.is_empty());
316        assert_eq!(cols.len(), 0);
317    }
318
319    #[test]
320    fn from_events_single_create() {
321        let event = make_create_event(1_700_000_000_000, "agent-a", "bn-a7x", "Do a thing");
322        let cols = CacheColumns::from_events(std::slice::from_ref(&event)).unwrap();
323        assert_eq!(cols.len(), 1);
324        assert_eq!(cols.timestamps[0], 1_700_000_000_000);
325        assert_eq!(cols.agents[0], "agent-a");
326        assert_eq!(cols.event_types[0], EventType::Create);
327        assert_eq!(cols.item_ids[0], "bn-a7x");
328        assert_eq!(cols.parents[0], "");
329        assert_eq!(cols.itc[0], "itc:AQ");
330        assert!(cols.values[0].contains("Do a thing"));
331    }
332
333    #[test]
334    fn from_events_parents_joined_with_comma() {
335        let mut event = make_create_event(1_000, "a", "bn-a7x", "T");
336        event.parents = vec!["blake3:aaa".to_string(), "blake3:bbb".to_string()];
337        let cols = CacheColumns::from_events(std::slice::from_ref(&event)).unwrap();
338        assert_eq!(cols.parents[0], "blake3:aaa,blake3:bbb");
339    }
340
341    #[test]
342    fn from_events_multiple() {
343        let events = vec![
344            make_create_event(1_000, "alice", "bn-a7x", "Task A"),
345            make_move_event(2_000, "bob", "bn-a7x", "blake3:abc"),
346            make_comment_event(3_000, "alice", "bn-a7x", "Look at this"),
347        ];
348        let cols = CacheColumns::from_events(&events).unwrap();
349        assert_eq!(cols.len(), 3);
350        assert_eq!(cols.timestamps, vec![1_000, 2_000, 3_000]);
351        assert_eq!(cols.agents, vec!["alice", "bob", "alice"]);
352        assert_eq!(
353            cols.event_types,
354            vec![EventType::Create, EventType::Move, EventType::Comment]
355        );
356        assert_eq!(cols.item_ids, vec!["bn-a7x", "bn-a7x", "bn-a7x"]);
357        assert_eq!(cols.parents[0], "");
358        assert_eq!(cols.parents[1], "blake3:abc");
359        assert_eq!(cols.parents[2], "");
360    }
361
362    // === CacheColumns::into_events ========================================
363
364    #[test]
365    fn into_events_empty() {
366        let cols = CacheColumns::default();
367        let events = cols.into_events().unwrap();
368        assert!(events.is_empty());
369    }
370
371    #[test]
372    fn into_events_roundtrip_single() {
373        let event = make_create_event(1_700_000_000_000, "agent-a", "bn-a7x", "Do a thing");
374        let cols = CacheColumns::from_events(std::slice::from_ref(&event)).unwrap();
375        let mut reconstructed = cols.into_events().unwrap();
376        assert_eq!(reconstructed.len(), 1);
377        let rec = &mut reconstructed[0];
378        // event_hash is not stored in cache columns — zero it for comparison
379        rec.event_hash = event.event_hash.clone();
380        assert_eq!(*rec, event);
381    }
382
383    #[test]
384    fn into_events_roundtrip_multiple() {
385        let events = vec![
386            make_create_event(1_000, "alice", "bn-a7x", "Task A"),
387            make_move_event(2_000, "bob", "bn-a7x", "blake3:abc"),
388            make_comment_event(3_000, "alice", "bn-a7x", "Look at this"),
389        ];
390        let cols = CacheColumns::from_events(&events).unwrap();
391        let mut reconstructed = cols.into_events().unwrap();
392        assert_eq!(reconstructed.len(), events.len());
393        for (i, (rec, orig)) in reconstructed.iter_mut().zip(events.iter()).enumerate() {
394            rec.event_hash = orig.event_hash.clone();
395            assert_eq!(rec, orig, "mismatch at row {i}");
396        }
397    }
398
399    #[test]
400    fn into_events_empty_parents_becomes_vec() {
401        let event = make_create_event(1_000, "alice", "bn-a7x", "Task");
402        let cols = CacheColumns::from_events(std::slice::from_ref(&event)).unwrap();
403        let reconstructed = cols.into_events().unwrap();
404        assert!(reconstructed[0].parents.is_empty());
405    }
406
407    #[test]
408    fn into_events_multi_parent() {
409        let mut event = make_create_event(1_000, "alice", "bn-a7x", "Task");
410        event.parents = vec!["blake3:aaa".to_string(), "blake3:bbb".to_string()];
411        let cols = CacheColumns::from_events(std::slice::from_ref(&event)).unwrap();
412        let reconstructed = cols.into_events().unwrap();
413        assert_eq!(
414            reconstructed[0].parents,
415            vec!["blake3:aaa".to_string(), "blake3:bbb".to_string()]
416        );
417    }
418
419    #[test]
420    fn into_events_column_length_mismatch_is_error() {
421        let mut cols = CacheColumns::default();
422        cols.timestamps = vec![1, 2];
423        cols.agents = vec!["a".to_string()]; // wrong length
424        cols.event_types = vec![EventType::Create, EventType::Create];
425        cols.item_ids = vec!["bn-a7x".to_string(), "bn-a7x".to_string()];
426        cols.parents = vec![String::new(), String::new()];
427        cols.itc = vec!["itc:AQ".to_string(), "itc:AQ".to_string()];
428        cols.values = vec![
429            r#"{"title":"T","kind":"task"}"#.to_string(),
430            r#"{"title":"T","kind":"task"}"#.to_string(),
431        ];
432        assert!(cols.into_events().is_err());
433    }
434
435    // === Row accessor =====================================================
436
437    #[test]
438    fn row_returns_correct_fields() {
439        let events = vec![
440            make_create_event(1_000, "alice", "bn-a7x", "Task A"),
441            make_move_event(2_000, "bob", "bn-b8y", "blake3:ref"),
442        ];
443        let cols = CacheColumns::from_events(&events).unwrap();
444        let row = cols.row(1).unwrap();
445        assert_eq!(row.wall_ts_us, 2_000);
446        assert_eq!(row.agent, "bob");
447        assert_eq!(row.event_type, EventType::Move);
448        assert_eq!(row.item_id, "bn-b8y");
449        assert_eq!(row.parents, "blake3:ref");
450    }
451
452    #[test]
453    fn row_out_of_bounds_returns_none() {
454        let cols = CacheColumns::default();
455        assert!(cols.row(0).is_none());
456    }
457
458    // === Column projections ==============================================
459
460    #[test]
461    fn event_types_projection() {
462        let events = vec![
463            make_create_event(1_000, "alice", "bn-a7x", "Task"),
464            make_move_event(2_000, "bob", "bn-a7x", "blake3:abc"),
465        ];
466        let cols = CacheColumns::from_events(&events).unwrap();
467        assert_eq!(cols.event_types(), &[EventType::Create, EventType::Move]);
468    }
469
470    #[test]
471    fn timestamps_projection() {
472        let events = vec![
473            make_create_event(100, "a", "bn-a7x", "T"),
474            make_create_event(200, "a", "bn-b8y", "U"),
475        ];
476        let cols = CacheColumns::from_events(&events).unwrap();
477        assert_eq!(cols.timestamps(), &[100, 200]);
478    }
479}