ubiquisync_core/codec/writer.rs
1use std::collections::HashMap;
2
3use crate::{codec::error::CodecError, uuid::Uuid};
4
5/// Encodes one log entry's body: accumulates bytes while feeding a rolling
6/// blake3 hash, and deduplicates UUIDs against a dictionary shared across the
7/// segment's entries.
8pub struct EntryBufferWriter<'a> {
9 buf: HashWriter,
10 uuid_dict: &'a mut HashMap<Uuid, u32>,
11}
12
13impl<'a> EntryBufferWriter<'a> {
14 /// Create a writer that shares `uuid_dict` for UUID dictionary compression
15 /// across the entries of one segment.
16 pub fn new(uuid_dict: &'a mut HashMap<Uuid, u32>) -> Self {
17 Self {
18 buf: HashWriter::new(),
19 uuid_dict,
20 }
21 }
22
23 /// Append a single raw byte.
24 pub fn write_byte(&mut self, b: u8) {
25 self.buf.append(&[b]);
26 }
27
28 /// Append `v` as an unsigned varint (7 data bits per byte, little-endian).
29 pub fn write_varint(&mut self, mut v: u64) {
30 loop {
31 let byte = (v & 0x7f) as u8;
32 v >>= 7;
33 if v == 0 {
34 self.write_byte(byte);
35 break;
36 } else {
37 self.write_byte(byte | 0x80);
38 }
39 }
40 }
41
42 /// Append a length-prefixed byte string: a varint length, then the bytes.
43 pub fn write_blob(&mut self, data: &[u8]) {
44 self.write_varint(data.len() as u64);
45 self.buf.append(data);
46 }
47
48 /// Append a `u16` in little-endian order.
49 pub fn write_u16_le(&mut self, v: u16) {
50 self.buf.append(&v.to_le_bytes());
51 }
52
53 /// Append a signed integer as a zigzag-encoded varint.
54 pub fn write_zigzag(&mut self, n: i64) {
55 let encoded = ((n << 1) ^ (n >> 63)) as u64;
56 self.write_varint(encoded);
57 }
58
59 /// Write a UUID using dictionary compression. The canonical hash
60 /// always sees the raw 16 bytes regardless of whether the buffer
61 /// gets a dict reference or an inline literal.
62 pub fn write_uuid(&mut self, data: &Uuid) {
63 // Hash the raw UUID bytes — canonical content identity must be
64 // independent of dictionary state.
65 self.buf._hash_without_append(data);
66
67 if let Some(id) = self.uuid_dict.get(data) {
68 // Known UUID — write dict reference varint to buf only.
69 self._write_varint_without_hash(*id as u64);
70 } else {
71 // First occurrence — write 0 sentinel + raw bytes to buf,
72 // and register in the dictionary for future references.
73 self.buf._append_without_hash(&[0]);
74 self.buf._append_without_hash(data);
75 let id = self.uuid_dict.len() as u32 + 1; // IDs start at 1; 0 is the inline sentinel.
76 self.uuid_dict.insert(*data, id);
77 }
78 }
79
80 fn _write_varint_without_hash(&mut self, mut v: u64) {
81 loop {
82 let byte = (v & 0x7f) as u8;
83 v >>= 7;
84 if v == 0 {
85 self.buf._append_without_hash(&[byte]);
86 break;
87 } else {
88 self.buf._append_without_hash(&[byte | 0x80]);
89 }
90 }
91 }
92
93 /// Write a delta-encoded timestamp. Timestamps within a segment must
94 /// be monotonically non-decreasing.
95 pub fn write_delta(&mut self, current: u64, last: u64) -> Result<(), CodecError> {
96 if current < last {
97 return Err(CodecError::NonMonotonicDelta);
98 }
99 let delta = current - last;
100 self.buf._hash_without_append(¤t.to_le_bytes());
101 self._write_varint_without_hash(delta);
102 Ok(())
103 }
104
105 /// Finalize the entry: appends truncated blake3 hash as integrity
106 /// check bytes and returns the encoded buffer plus the full hash.
107 pub fn finalize(self) -> (Vec<u8>, blake3::Hash) {
108 self.buf.finalize()
109 }
110
111 /// Return just the encoded body, **without** the truncated-hash integrity
112 /// trailer that [`finalize`](Self::finalize) appends. Used by callers that
113 /// reuse the wire encoding for storage and supply their own framing — e.g.
114 /// the SQL op-log index codec, which stores the raw `key`/`value` bytes and
115 /// has no use for a per-blob integrity hash.
116 pub fn into_bytes(self) -> Vec<u8> {
117 self.buf.into_bytes()
118 }
119}
120
121struct HashWriter {
122 buf: Vec<u8>,
123 hasher: blake3::Hasher,
124}
125
126impl HashWriter {
127 fn new() -> Self {
128 Self {
129 buf: Vec::new(),
130 hasher: blake3::Hasher::new(),
131 }
132 }
133
134 fn append(&mut self, data: &[u8]) {
135 self.buf.extend_from_slice(data);
136 self.hasher.update(data);
137 }
138
139 /// Write to buf only — skips the hasher. Used for dictionary-compressed
140 /// encodings where the canonical hash sees different bytes than the buf.
141 fn _append_without_hash(&mut self, data: &[u8]) {
142 self.buf.extend_from_slice(data);
143 }
144
145 /// Update the hasher only — skips the buf. Used to feed canonical
146 /// content (e.g. raw UUID bytes) into the hash without writing them.
147 fn _hash_without_append(&mut self, data: &[u8]) {
148 self.hasher.update(data);
149 }
150
151 fn finalize(self) -> (Vec<u8>, blake3::Hash) {
152 let hash = self.hasher.finalize();
153 // append first 4 bytes of the blake3 hash as a truncated-hash integrity check
154 let mut buf = self.buf;
155 buf.extend_from_slice(&hash.as_bytes()[..4]);
156 (buf, hash)
157 }
158
159 /// Return the accumulated buffer without computing or appending the hash.
160 fn into_bytes(self) -> Vec<u8> {
161 self.buf
162 }
163}