Skip to main content

libgrite_git/
chunk.rs

1//! CBOR chunk encoding/decoding for portable event storage
2//!
3//! Chunk format:
4//! - Magic: `GRITCHNK` (8 bytes)
5//! - Version: u16 (little-endian)
6//! - Codec length: u8
7//! - Codec: "cbor-v1"
8//! - Payload: CBOR array of events
9
10use blake2::{Blake2b, Digest};
11use blake2::digest::consts::U32;
12use ciborium::Value;
13use libgrite_core::types::event::{DependencyType, Event, EventKind, IssueState, SymbolInfo};
14use libgrite_core::types::ids::{ActorId, EventId, IssueId};
15
16use crate::GitError;
17
18/// Magic bytes at start of chunk
19pub const CHUNK_MAGIC: &[u8; 8] = b"GRITCHNK";
20
21/// Current chunk format version
22pub const CHUNK_VERSION: u16 = 1;
23
24/// Codec identifier
25pub const CHUNK_CODEC: &str = "cbor-v1";
26
27/// Encode a list of events into a chunk
28pub fn encode_chunk(events: &[Event]) -> Result<Vec<u8>, GitError> {
29    let mut buf = Vec::new();
30
31    // Magic
32    buf.extend_from_slice(CHUNK_MAGIC);
33
34    // Version (little-endian u16)
35    buf.extend_from_slice(&CHUNK_VERSION.to_le_bytes());
36
37    // Codec length and codec string
38    let codec_bytes = CHUNK_CODEC.as_bytes();
39    buf.push(codec_bytes.len() as u8);
40    buf.extend_from_slice(codec_bytes);
41
42    // Encode events as CBOR array
43    let events_value = events_to_cbor(events);
44    ciborium::into_writer(&events_value, &mut buf)
45        .map_err(|e| GitError::CborDecode(format!("Failed to encode events: {}", e)))?;
46
47    Ok(buf)
48}
49
50/// Decode a chunk into a list of events
51pub fn decode_chunk(data: &[u8]) -> Result<Vec<Event>, GitError> {
52    // Check minimum size
53    if data.len() < 8 + 2 + 1 {
54        return Err(GitError::InvalidChunk("Chunk too small".to_string()));
55    }
56
57    // Verify magic
58    if &data[0..8] != CHUNK_MAGIC {
59        return Err(GitError::InvalidChunk("Invalid magic bytes".to_string()));
60    }
61
62    // Read version
63    let version = u16::from_le_bytes([data[8], data[9]]);
64    if version != CHUNK_VERSION {
65        return Err(GitError::InvalidChunk(format!(
66            "Unsupported chunk version: {}",
67            version
68        )));
69    }
70
71    // Read codec
72    let codec_len = data[10] as usize;
73    if data.len() < 11 + codec_len {
74        return Err(GitError::InvalidChunk("Chunk truncated at codec".to_string()));
75    }
76    let codec = std::str::from_utf8(&data[11..11 + codec_len])
77        .map_err(|_| GitError::InvalidChunk("Invalid codec string".to_string()))?;
78    if codec != CHUNK_CODEC {
79        return Err(GitError::InvalidChunk(format!(
80            "Unsupported codec: {}",
81            codec
82        )));
83    }
84
85    // Parse CBOR payload
86    let payload_start = 11 + codec_len;
87    let value: Value = ciborium::from_reader(&data[payload_start..])
88        .map_err(|e| GitError::CborDecode(format!("Failed to decode CBOR: {}", e)))?;
89
90    cbor_to_events(value)
91}
92
93/// Compute BLAKE2b-256 hash of chunk data
94pub fn chunk_hash(data: &[u8]) -> [u8; 32] {
95    let mut hasher = Blake2b::<U32>::new();
96    hasher.update(data);
97    hasher.finalize().into()
98}
99
100/// Convert events to CBOR value
101fn events_to_cbor(events: &[Event]) -> Value {
102    let events_array: Vec<Value> = events.iter().map(event_to_cbor).collect();
103    Value::Array(events_array)
104}
105
106/// Convert a single event to CBOR
107/// Format: [event_id, issue_id, actor, ts, parent, kind_tag, kind_payload, sig]
108fn event_to_cbor(event: &Event) -> Value {
109    let (kind_tag, kind_payload) = libgrite_core::hash::kind_to_tag_and_payload(&event.kind);
110
111    let parent_value = match &event.parent {
112        Some(p) => Value::Bytes(p.to_vec()),
113        None => Value::Null,
114    };
115
116    let sig_value = match &event.sig {
117        Some(s) => Value::Bytes(s.clone()),
118        None => Value::Null,
119    };
120
121    Value::Array(vec![
122        Value::Bytes(event.event_id.to_vec()),
123        Value::Bytes(event.issue_id.to_vec()),
124        Value::Bytes(event.actor.to_vec()),
125        Value::Integer(event.ts_unix_ms.into()),
126        parent_value,
127        Value::Integer(kind_tag.into()),
128        kind_payload,
129        sig_value,
130    ])
131}
132
133/// Convert CBOR value to events
134fn cbor_to_events(value: Value) -> Result<Vec<Event>, GitError> {
135    let array = match value {
136        Value::Array(arr) => arr,
137        _ => return Err(GitError::InvalidChunk("Expected array of events".to_string())),
138    };
139
140    array.into_iter().map(cbor_to_event).collect()
141}
142
143/// Convert a single CBOR value to an Event
144fn cbor_to_event(value: Value) -> Result<Event, GitError> {
145    let array = match value {
146        Value::Array(arr) => arr,
147        _ => return Err(GitError::InvalidEvent("Expected event array".to_string())),
148    };
149
150    if array.len() != 8 {
151        return Err(GitError::InvalidEvent(format!(
152            "Expected 8 elements, got {}",
153            array.len()
154        )));
155    }
156
157    let mut iter = array.into_iter();
158
159    // event_id
160    let event_id: EventId = extract_bytes(&iter.next().unwrap(), "event_id", 32)?
161        .try_into()
162        .map_err(|_| GitError::InvalidEvent("Invalid event_id length".to_string()))?;
163
164    // issue_id
165    let issue_id: IssueId = extract_bytes(&iter.next().unwrap(), "issue_id", 16)?
166        .try_into()
167        .map_err(|_| GitError::InvalidEvent("Invalid issue_id length".to_string()))?;
168
169    // actor
170    let actor: ActorId = extract_bytes(&iter.next().unwrap(), "actor", 16)?
171        .try_into()
172        .map_err(|_| GitError::InvalidEvent("Invalid actor length".to_string()))?;
173
174    // ts_unix_ms
175    let ts_unix_ms = extract_u64(&iter.next().unwrap(), "ts_unix_ms")?;
176
177    // parent
178    let parent_value = iter.next().unwrap();
179    let parent: Option<EventId> = match parent_value {
180        Value::Null => None,
181        Value::Bytes(b) => {
182            let arr: EventId = b
183                .try_into()
184                .map_err(|_| GitError::InvalidEvent("Invalid parent length".to_string()))?;
185            Some(arr)
186        }
187        _ => return Err(GitError::InvalidEvent("Invalid parent type".to_string())),
188    };
189
190    // kind_tag
191    let kind_tag = extract_u32(&iter.next().unwrap(), "kind_tag")?;
192
193    // kind_payload
194    let kind_payload = iter.next().unwrap();
195
196    // sig
197    let sig_value = iter.next().unwrap();
198    let sig: Option<Vec<u8>> = match sig_value {
199        Value::Null => None,
200        Value::Bytes(b) => Some(b),
201        _ => return Err(GitError::InvalidEvent("Invalid sig type".to_string())),
202    };
203
204    // Parse kind from tag and payload
205    let kind = parse_event_kind(kind_tag, kind_payload)?;
206
207    Ok(Event {
208        event_id,
209        issue_id,
210        actor,
211        ts_unix_ms,
212        parent,
213        kind,
214        sig,
215    })
216}
217
218/// Parse EventKind from tag and payload
219fn parse_event_kind(tag: u32, payload: Value) -> Result<EventKind, GitError> {
220    let array = match payload {
221        Value::Array(arr) => arr,
222        _ => return Err(GitError::InvalidEvent("Expected kind payload array".to_string())),
223    };
224
225    match tag {
226        1 => {
227            // IssueCreated { title, body, labels }
228            if array.len() != 3 {
229                return Err(GitError::InvalidEvent("IssueCreated expects 3 fields".to_string()));
230            }
231            let mut iter = array.into_iter();
232            let title = extract_string(&iter.next().unwrap(), "title")?;
233            let body = extract_string(&iter.next().unwrap(), "body")?;
234            let labels = extract_string_array(&iter.next().unwrap(), "labels")?;
235            Ok(EventKind::IssueCreated { title, body, labels })
236        }
237        2 => {
238            // IssueUpdated { title, body }
239            if array.len() != 2 {
240                return Err(GitError::InvalidEvent("IssueUpdated expects 2 fields".to_string()));
241            }
242            let mut iter = array.into_iter();
243            let title = extract_optional_string(&iter.next().unwrap(), "title")?;
244            let body = extract_optional_string(&iter.next().unwrap(), "body")?;
245            Ok(EventKind::IssueUpdated { title, body })
246        }
247        3 => {
248            // CommentAdded { body }
249            if array.len() != 1 {
250                return Err(GitError::InvalidEvent("CommentAdded expects 1 field".to_string()));
251            }
252            let body = extract_string(&array.into_iter().next().unwrap(), "body")?;
253            Ok(EventKind::CommentAdded { body })
254        }
255        4 => {
256            // LabelAdded { label }
257            if array.len() != 1 {
258                return Err(GitError::InvalidEvent("LabelAdded expects 1 field".to_string()));
259            }
260            let label = extract_string(&array.into_iter().next().unwrap(), "label")?;
261            Ok(EventKind::LabelAdded { label })
262        }
263        5 => {
264            // LabelRemoved { label }
265            if array.len() != 1 {
266                return Err(GitError::InvalidEvent("LabelRemoved expects 1 field".to_string()));
267            }
268            let label = extract_string(&array.into_iter().next().unwrap(), "label")?;
269            Ok(EventKind::LabelRemoved { label })
270        }
271        6 => {
272            // StateChanged { state }
273            if array.len() != 1 {
274                return Err(GitError::InvalidEvent("StateChanged expects 1 field".to_string()));
275            }
276            let state_str = extract_string(&array.into_iter().next().unwrap(), "state")?;
277            let state = match state_str.as_str() {
278                "open" => IssueState::Open,
279                "closed" => IssueState::Closed,
280                _ => return Err(GitError::InvalidEvent(format!("Invalid state: {}", state_str))),
281            };
282            Ok(EventKind::StateChanged { state })
283        }
284        7 => {
285            // LinkAdded { url, note }
286            if array.len() != 2 {
287                return Err(GitError::InvalidEvent("LinkAdded expects 2 fields".to_string()));
288            }
289            let mut iter = array.into_iter();
290            let url = extract_string(&iter.next().unwrap(), "url")?;
291            let note = extract_optional_string(&iter.next().unwrap(), "note")?;
292            Ok(EventKind::LinkAdded { url, note })
293        }
294        8 => {
295            // AssigneeAdded { user }
296            if array.len() != 1 {
297                return Err(GitError::InvalidEvent("AssigneeAdded expects 1 field".to_string()));
298            }
299            let user = extract_string(&array.into_iter().next().unwrap(), "user")?;
300            Ok(EventKind::AssigneeAdded { user })
301        }
302        9 => {
303            // AssigneeRemoved { user }
304            if array.len() != 1 {
305                return Err(GitError::InvalidEvent("AssigneeRemoved expects 1 field".to_string()));
306            }
307            let user = extract_string(&array.into_iter().next().unwrap(), "user")?;
308            Ok(EventKind::AssigneeRemoved { user })
309        }
310        10 => {
311            // AttachmentAdded { name, sha256, mime }
312            if array.len() != 3 {
313                return Err(GitError::InvalidEvent("AttachmentAdded expects 3 fields".to_string()));
314            }
315            let mut iter = array.into_iter();
316            let name = extract_string(&iter.next().unwrap(), "name")?;
317            let sha256: [u8; 32] = extract_bytes(&iter.next().unwrap(), "sha256", 32)?
318                .try_into()
319                .map_err(|_| GitError::InvalidEvent("Invalid sha256 length".to_string()))?;
320            let mime = extract_string(&iter.next().unwrap(), "mime")?;
321            Ok(EventKind::AttachmentAdded { name, sha256, mime })
322        }
323        11 => {
324            // DependencyAdded { target, dep_type }
325            if array.len() != 2 {
326                return Err(GitError::InvalidEvent("DependencyAdded expects 2 fields".to_string()));
327            }
328            let mut iter = array.into_iter();
329            let target: IssueId = extract_bytes(&iter.next().unwrap(), "target", 16)?
330                .try_into()
331                .map_err(|_| GitError::InvalidEvent("Invalid target length".to_string()))?;
332            let dep_type_str = extract_string(&iter.next().unwrap(), "dep_type")?;
333            let dep_type = DependencyType::from_str(&dep_type_str)
334                .ok_or_else(|| GitError::InvalidEvent(format!("Invalid dep_type: {}", dep_type_str)))?;
335            Ok(EventKind::DependencyAdded { target, dep_type })
336        }
337        12 => {
338            // DependencyRemoved { target, dep_type }
339            if array.len() != 2 {
340                return Err(GitError::InvalidEvent("DependencyRemoved expects 2 fields".to_string()));
341            }
342            let mut iter = array.into_iter();
343            let target: IssueId = extract_bytes(&iter.next().unwrap(), "target", 16)?
344                .try_into()
345                .map_err(|_| GitError::InvalidEvent("Invalid target length".to_string()))?;
346            let dep_type_str = extract_string(&iter.next().unwrap(), "dep_type")?;
347            let dep_type = DependencyType::from_str(&dep_type_str)
348                .ok_or_else(|| GitError::InvalidEvent(format!("Invalid dep_type: {}", dep_type_str)))?;
349            Ok(EventKind::DependencyRemoved { target, dep_type })
350        }
351        13 => {
352            // ContextUpdated { path, language, symbols, summary, content_hash }
353            if array.len() != 5 {
354                return Err(GitError::InvalidEvent("ContextUpdated expects 5 fields".to_string()));
355            }
356            let mut iter = array.into_iter();
357            let path = extract_string(&iter.next().unwrap(), "path")?;
358            let language = extract_string(&iter.next().unwrap(), "language")?;
359            let symbols_value = iter.next().unwrap();
360            let symbols = parse_symbols(symbols_value)?;
361            let summary = extract_string(&iter.next().unwrap(), "summary")?;
362            let content_hash: [u8; 32] = extract_bytes(&iter.next().unwrap(), "content_hash", 32)?
363                .try_into()
364                .map_err(|_| GitError::InvalidEvent("Invalid content_hash length".to_string()))?;
365            Ok(EventKind::ContextUpdated { path, language, symbols, summary, content_hash })
366        }
367        14 => {
368            // ProjectContextUpdated { key, value }
369            if array.len() != 2 {
370                return Err(GitError::InvalidEvent("ProjectContextUpdated expects 2 fields".to_string()));
371            }
372            let mut iter = array.into_iter();
373            let key = extract_string(&iter.next().unwrap(), "key")?;
374            let value = extract_string(&iter.next().unwrap(), "value")?;
375            Ok(EventKind::ProjectContextUpdated { key, value })
376        }
377        _ => Err(GitError::InvalidEvent(format!("Unknown kind tag: {}", tag))),
378    }
379}
380
381/// Parse a CBOR array of symbols into Vec<SymbolInfo>
382fn parse_symbols(value: Value) -> Result<Vec<SymbolInfo>, GitError> {
383    let array = match value {
384        Value::Array(arr) => arr,
385        _ => return Err(GitError::InvalidEvent("symbols must be array".to_string())),
386    };
387    array.into_iter().map(|sym_value| {
388        let sym_arr = match sym_value {
389            Value::Array(arr) => arr,
390            _ => return Err(GitError::InvalidEvent("symbol must be array".to_string())),
391        };
392        if sym_arr.len() != 4 {
393            return Err(GitError::InvalidEvent("symbol expects 4 fields".to_string()));
394        }
395        let mut iter = sym_arr.into_iter();
396        let name = extract_string(&iter.next().unwrap(), "symbol.name")?;
397        let kind = extract_string(&iter.next().unwrap(), "symbol.kind")?;
398        let line_start = extract_u32(&iter.next().unwrap(), "symbol.line_start")?;
399        let line_end = extract_u32(&iter.next().unwrap(), "symbol.line_end")?;
400        Ok(SymbolInfo { name, kind, line_start, line_end })
401    }).collect()
402}
403
404// Helper functions for extracting values from CBOR
405
406fn extract_bytes(value: &Value, field: &str, expected_len: usize) -> Result<Vec<u8>, GitError> {
407    match value {
408        Value::Bytes(b) => {
409            if b.len() != expected_len {
410                return Err(GitError::InvalidEvent(format!(
411                    "{} has wrong length: expected {}, got {}",
412                    field,
413                    expected_len,
414                    b.len()
415                )));
416            }
417            Ok(b.clone())
418        }
419        _ => Err(GitError::InvalidEvent(format!("{} must be bytes", field))),
420    }
421}
422
423fn extract_u64(value: &Value, field: &str) -> Result<u64, GitError> {
424    match value {
425        Value::Integer(i) => {
426            let n: i128 = (*i).into();
427            if n < 0 || n > u64::MAX as i128 {
428                return Err(GitError::InvalidEvent(format!("{} out of range", field)));
429            }
430            Ok(n as u64)
431        }
432        _ => Err(GitError::InvalidEvent(format!("{} must be integer", field))),
433    }
434}
435
436fn extract_u32(value: &Value, field: &str) -> Result<u32, GitError> {
437    match value {
438        Value::Integer(i) => {
439            let n: i128 = (*i).into();
440            if n < 0 || n > u32::MAX as i128 {
441                return Err(GitError::InvalidEvent(format!("{} out of range", field)));
442            }
443            Ok(n as u32)
444        }
445        _ => Err(GitError::InvalidEvent(format!("{} must be integer", field))),
446    }
447}
448
449fn extract_string(value: &Value, field: &str) -> Result<String, GitError> {
450    match value {
451        Value::Text(s) => Ok(s.clone()),
452        _ => Err(GitError::InvalidEvent(format!("{} must be string", field))),
453    }
454}
455
456fn extract_optional_string(value: &Value, field: &str) -> Result<Option<String>, GitError> {
457    match value {
458        Value::Null => Ok(None),
459        Value::Text(s) => Ok(Some(s.clone())),
460        _ => Err(GitError::InvalidEvent(format!(
461            "{} must be string or null",
462            field
463        ))),
464    }
465}
466
467fn extract_string_array(value: &Value, field: &str) -> Result<Vec<String>, GitError> {
468    match value {
469        Value::Array(arr) => {
470            arr.iter()
471                .map(|v| extract_string(v, field))
472                .collect()
473        }
474        _ => Err(GitError::InvalidEvent(format!("{} must be array", field))),
475    }
476}
477
478#[cfg(test)]
479mod tests {
480    use super::*;
481    use libgrite_core::hash::compute_event_id;
482    use libgrite_core::types::ids::generate_issue_id;
483
484    fn make_test_event(kind: EventKind) -> Event {
485        let issue_id = generate_issue_id();
486        let actor = [1u8; 16];
487        let ts_unix_ms = 1700000000000u64;
488        let event_id = compute_event_id(&issue_id, &actor, ts_unix_ms, None, &kind);
489        Event::new(event_id, issue_id, actor, ts_unix_ms, None, kind)
490    }
491
492    #[test]
493    fn test_chunk_roundtrip_issue_created() {
494        let event = make_test_event(EventKind::IssueCreated {
495            title: "Test Issue".to_string(),
496            body: "Test body".to_string(),
497            labels: vec!["bug".to_string(), "p0".to_string()],
498        });
499
500        let chunk = encode_chunk(&[event.clone()]).unwrap();
501
502        // Verify magic
503        assert_eq!(&chunk[0..8], CHUNK_MAGIC);
504
505        // Decode and verify
506        let decoded = decode_chunk(&chunk).unwrap();
507        assert_eq!(decoded.len(), 1);
508        assert_eq!(decoded[0].event_id, event.event_id);
509        assert_eq!(decoded[0].issue_id, event.issue_id);
510        assert_eq!(decoded[0].actor, event.actor);
511        assert_eq!(decoded[0].ts_unix_ms, event.ts_unix_ms);
512
513        if let EventKind::IssueCreated { title, body, labels } = &decoded[0].kind {
514            assert_eq!(title, "Test Issue");
515            assert_eq!(body, "Test body");
516            assert!(labels.contains(&"bug".to_string()));
517            assert!(labels.contains(&"p0".to_string()));
518        } else {
519            panic!("Wrong event kind");
520        }
521    }
522
523    #[test]
524    fn test_chunk_roundtrip_all_kinds() {
525        let events = vec![
526            make_test_event(EventKind::IssueCreated {
527                title: "Test".to_string(),
528                body: "Body".to_string(),
529                labels: vec![],
530            }),
531            make_test_event(EventKind::IssueUpdated {
532                title: Some("New Title".to_string()),
533                body: None,
534            }),
535            make_test_event(EventKind::CommentAdded {
536                body: "A comment".to_string(),
537            }),
538            make_test_event(EventKind::LabelAdded {
539                label: "bug".to_string(),
540            }),
541            make_test_event(EventKind::LabelRemoved {
542                label: "wip".to_string(),
543            }),
544            make_test_event(EventKind::StateChanged {
545                state: IssueState::Closed,
546            }),
547            make_test_event(EventKind::LinkAdded {
548                url: "https://example.com".to_string(),
549                note: Some("ref".to_string()),
550            }),
551            make_test_event(EventKind::AssigneeAdded {
552                user: "alice".to_string(),
553            }),
554            make_test_event(EventKind::AssigneeRemoved {
555                user: "bob".to_string(),
556            }),
557            make_test_event(EventKind::AttachmentAdded {
558                name: "file.txt".to_string(),
559                sha256: [0u8; 32],
560                mime: "text/plain".to_string(),
561            }),
562            make_test_event(EventKind::DependencyAdded {
563                target: [0xAA; 16],
564                dep_type: DependencyType::Blocks,
565            }),
566            make_test_event(EventKind::DependencyRemoved {
567                target: [0xBB; 16],
568                dep_type: DependencyType::DependsOn,
569            }),
570            make_test_event(EventKind::ContextUpdated {
571                path: "src/main.rs".to_string(),
572                language: "rust".to_string(),
573                symbols: vec![
574                    SymbolInfo { name: "main".to_string(), kind: "function".to_string(), line_start: 1, line_end: 10 },
575                ],
576                summary: "Entry point".to_string(),
577                content_hash: [0xCC; 32],
578            }),
579            make_test_event(EventKind::ProjectContextUpdated {
580                key: "framework".to_string(),
581                value: "actix-web".to_string(),
582            }),
583        ];
584
585        let chunk = encode_chunk(&events).unwrap();
586        let decoded = decode_chunk(&chunk).unwrap();
587
588        assert_eq!(decoded.len(), events.len());
589        for (orig, dec) in events.iter().zip(decoded.iter()) {
590            assert_eq!(orig.event_id, dec.event_id);
591            assert_eq!(orig.kind, dec.kind);
592        }
593    }
594
595    #[test]
596    fn test_chunk_hash_deterministic() {
597        let event = make_test_event(EventKind::IssueCreated {
598            title: "Test".to_string(),
599            body: "Body".to_string(),
600            labels: vec![],
601        });
602
603        let chunk1 = encode_chunk(&[event.clone()]).unwrap();
604        let chunk2 = encode_chunk(&[event]).unwrap();
605
606        let hash1 = chunk_hash(&chunk1);
607        let hash2 = chunk_hash(&chunk2);
608
609        assert_eq!(hash1, hash2);
610    }
611
612    #[test]
613    fn test_invalid_chunk_magic() {
614        let data = b"BADMAGIC\x01\x00\x07cbor-v1";
615        let result = decode_chunk(data);
616        assert!(matches!(result, Err(GitError::InvalidChunk(_))));
617    }
618
619    #[test]
620    fn test_invalid_chunk_version() {
621        let mut data = Vec::new();
622        data.extend_from_slice(CHUNK_MAGIC);
623        data.extend_from_slice(&99u16.to_le_bytes()); // Bad version
624        data.push(7);
625        data.extend_from_slice(b"cbor-v1");
626
627        let result = decode_chunk(&data);
628        assert!(matches!(result, Err(GitError::InvalidChunk(_))));
629    }
630}