Skip to main content

ubiquisync_core/codec/
writer.rs

1use std::collections::HashMap;
2
3use crate::{codec::error::CodecError, uuid::Uuid};
4
5/// Encodes one log entry's body: accumulates bytes while feeding a rolling
6/// blake3 hash, and deduplicates UUIDs against a dictionary shared across the
7/// segment's entries.
8pub struct EntryBufferWriter<'a> {
9    buf: HashWriter,
10    uuid_dict: &'a mut HashMap<Uuid, u32>,
11}
12
13impl<'a> EntryBufferWriter<'a> {
14    /// Create a writer that shares `uuid_dict` for UUID dictionary compression
15    /// across the entries of one segment.
16    pub fn new(uuid_dict: &'a mut HashMap<Uuid, u32>) -> Self {
17        Self {
18            buf: HashWriter::new(),
19            uuid_dict,
20        }
21    }
22
23    /// Append a single raw byte.
24    pub fn write_byte(&mut self, b: u8) {
25        self.buf.append(&[b]);
26    }
27
28    /// Append `v` as an unsigned varint (7 data bits per byte, little-endian).
29    pub fn write_varint(&mut self, mut v: u64) {
30        loop {
31            let byte = (v & 0x7f) as u8;
32            v >>= 7;
33            if v == 0 {
34                self.write_byte(byte);
35                break;
36            } else {
37                self.write_byte(byte | 0x80);
38            }
39        }
40    }
41
42    /// Append a length-prefixed byte string: a varint length, then the bytes.
43    pub fn write_blob(&mut self, data: &[u8]) {
44        self.write_varint(data.len() as u64);
45        self.buf.append(data);
46    }
47
48    /// Append a `u16` in little-endian order.
49    pub fn write_u16_le(&mut self, v: u16) {
50        self.buf.append(&v.to_le_bytes());
51    }
52
53    /// Append a signed integer as a zigzag-encoded varint.
54    pub fn write_zigzag(&mut self, n: i64) {
55        let encoded = ((n << 1) ^ (n >> 63)) as u64;
56        self.write_varint(encoded);
57    }
58
59    /// Write a UUID using dictionary compression. The canonical hash
60    /// always sees the raw 16 bytes regardless of whether the buffer
61    /// gets a dict reference or an inline literal.
62    pub fn write_uuid(&mut self, data: &Uuid) {
63        // Hash the raw UUID bytes — canonical content identity must be
64        // independent of dictionary state.
65        self.buf._hash_without_append(data);
66
67        if let Some(id) = self.uuid_dict.get(data) {
68            // Known UUID — write dict reference varint to buf only.
69            self._write_varint_without_hash(*id as u64);
70        } else {
71            // First occurrence — write 0 sentinel + raw bytes to buf,
72            // and register in the dictionary for future references.
73            self.buf._append_without_hash(&[0]);
74            self.buf._append_without_hash(data);
75            let id = self.uuid_dict.len() as u32 + 1; // IDs start at 1; 0 is the inline sentinel.
76            self.uuid_dict.insert(*data, id);
77        }
78    }
79
80    fn _write_varint_without_hash(&mut self, mut v: u64) {
81        loop {
82            let byte = (v & 0x7f) as u8;
83            v >>= 7;
84            if v == 0 {
85                self.buf._append_without_hash(&[byte]);
86                break;
87            } else {
88                self.buf._append_without_hash(&[byte | 0x80]);
89            }
90        }
91    }
92
93    /// Write a delta-encoded timestamp. Timestamps within a segment must
94    /// be monotonically non-decreasing.
95    pub fn write_delta(&mut self, current: u64, last: u64) -> Result<(), CodecError> {
96        if current < last {
97            return Err(CodecError::NonMonotonicDelta);
98        }
99        let delta = current - last;
100        self.buf._hash_without_append(&current.to_le_bytes());
101        self._write_varint_without_hash(delta);
102        Ok(())
103    }
104
105    /// Finalize the entry: appends truncated blake3 hash as integrity
106    /// check bytes and returns the encoded buffer plus the full hash.
107    pub fn finalize(self) -> (Vec<u8>, blake3::Hash) {
108        self.buf.finalize()
109    }
110
111    /// Return just the encoded body, **without** the truncated-hash integrity
112    /// trailer that [`finalize`](Self::finalize) appends. Used by callers that
113    /// reuse the wire encoding for storage and supply their own framing — e.g.
114    /// the SQL op-log index codec, which stores the raw `key`/`value` bytes and
115    /// has no use for a per-blob integrity hash.
116    pub fn into_bytes(self) -> Vec<u8> {
117        self.buf.into_bytes()
118    }
119}
120
121struct HashWriter {
122    buf: Vec<u8>,
123    hasher: blake3::Hasher,
124}
125
126impl HashWriter {
127    fn new() -> Self {
128        Self {
129            buf: Vec::new(),
130            hasher: blake3::Hasher::new(),
131        }
132    }
133
134    fn append(&mut self, data: &[u8]) {
135        self.buf.extend_from_slice(data);
136        self.hasher.update(data);
137    }
138
139    /// Write to buf only — skips the hasher. Used for dictionary-compressed
140    /// encodings where the canonical hash sees different bytes than the buf.
141    fn _append_without_hash(&mut self, data: &[u8]) {
142        self.buf.extend_from_slice(data);
143    }
144
145    /// Update the hasher only — skips the buf. Used to feed canonical
146    /// content (e.g. raw UUID bytes) into the hash without writing them.
147    fn _hash_without_append(&mut self, data: &[u8]) {
148        self.hasher.update(data);
149    }
150
151    fn finalize(self) -> (Vec<u8>, blake3::Hash) {
152        let hash = self.hasher.finalize();
153        // append first 4 bytes of the blake3 hash as a truncated-hash integrity check
154        let mut buf = self.buf;
155        buf.extend_from_slice(&hash.as_bytes()[..4]);
156        (buf, hash)
157    }
158
159    /// Return the accumulated buffer without computing or appending the hash.
160    fn into_bytes(self) -> Vec<u8> {
161        self.buf
162    }
163}