ubl_codec/
binary.rs

1//! Binary TLV codec (varint + frames) for LogLine Workspace.
2//!
3//! Provides:
4//! - **Varint (u64)**: base-128 encoding with MSB as continuation bit
5//! - **TLV**: Tag (u8) + optional Length (varint) + Value (bytes)
6//! - **Frames**: `typ (u8) + len (varint) + payload`
7//!
8//! Fixed-size types (`CID32`, `PUBKEY32`, `SIG64`) don't carry length (known size).
9//!
10//! ## Security Limits
11//!
12//! - [`MAX_FRAME_LEN`]: Hard limit on frame payload (~1 MiB) to prevent DoS
13//! - [`MAX_VARINT_BYTES`]: Maximum bytes for varint encoding (10) to prevent overflow
14
15use ubl_types::{Cid32, PublicKeyBytes, SignatureBytes};
16use thiserror::Error;
17
18// ══════════════════════════════════════════════════════════════════════════════
19// Security Limits (DoS guards)
20// ══════════════════════════════════════════════════════════════════════════════
21
22/// Hard limit for frame payload size (~1 MiB). Prevents memory exhaustion DoS.
23pub const MAX_FRAME_LEN: usize = 1 << 20;
24
25/// Maximum bytes for a varint-encoded u64 (ceil(64/7) = 10).
26pub const MAX_VARINT_BYTES: usize = 10;
27
28/// Maximum bytes for variable-length TLV fields (same as frame limit).
29pub const MAX_BYTES_LEN: usize = MAX_FRAME_LEN;
30
31// ══════════════════════════════════════════════════════════════════════════════
32// Canonical Tags (0x00–0x3F: primitives; 0x40–0x7F: reserved; 0x80+ vendor)
33// ══════════════════════════════════════════════════════════════════════════════
34
35/// Tag for variable-length bytes.
36pub const T_BYTES: u8 = 0x01;
37/// Tag for UTF-8 string.
38pub const T_STR: u8 = 0x02;
39/// Tag for varint-encoded u64.
40pub const T_U64: u8 = 0x03;
41/// Tag for 32-byte CID (BLAKE3).
42pub const T_CID32: u8 = 0x10;
43/// Tag for 32-byte Ed25519 public key.
44pub const T_PUBKEY32: u8 = 0x11;
45/// Tag for 64-byte Ed25519 signature.
46pub const T_SIG64: u8 = 0x12;
47
48// ══════════════════════════════════════════════════════════════════════════════
49// Errors
50// ══════════════════════════════════════════════════════════════════════════════
51
52/// Errors from binary codec operations.
53#[derive(Debug, Error)]
54pub enum BinaryCodecError {
55    /// Unexpected end of input.
56    #[error("unexpected EOF")]
57    Eof,
58    /// Malformed varint encoding.
59    #[error("malformed varint")]
60    Varint,
61    /// Varint exceeds maximum allowed bytes.
62    #[error("varint overflow: exceeded {MAX_VARINT_BYTES} bytes")]
63    VarintOverflow,
64    /// Declared size exceeds maximum allowed.
65    #[error("size limit exceeded: {declared} > {MAX_FRAME_LEN}")]
66    SizeLimit {
67        /// Declared size that exceeded the limit.
68        declared: usize,
69    },
70    /// Invalid length for fixed-size field.
71    #[error("invalid length")]
72    Length,
73    /// Unexpected tag encountered.
74    #[error("unexpected tag: got {got:#04x}, expected {expected:#04x}")]
75    Tag {
76        /// Tag that was found.
77        got: u8,
78        /// Tag that was expected.
79        expected: u8,
80    },
81    /// Invalid UTF-8 in string.
82    #[error("invalid UTF-8")]
83    Utf8,
84}
85
86// ══════════════════════════════════════════════════════════════════════════════
87// Varint encoding/decoding
88// ══════════════════════════════════════════════════════════════════════════════
89
90/// Encodes a `u64` as a base-128 varint, appending to `out`.
91#[inline]
92pub fn encode_varint_u64(mut x: u64, out: &mut Vec<u8>) {
93    while x >= 0x80 {
94        out.push(((x as u8) & 0x7F) | 0x80);
95        x >>= 7;
96    }
97    out.push(x as u8);
98}
99
100/// Decodes a base-128 varint from `input` starting at `pos`, advancing `pos`.
101///
102/// # Errors
103///
104/// - `BinaryCodecError::Eof` if input ends prematurely
105/// - `BinaryCodecError::VarintOverflow` if exceeds [`MAX_VARINT_BYTES`]
106/// - `BinaryCodecError::Varint` if encoding is malformed (shift > 63)
107#[inline]
108pub fn decode_varint_u64(input: &[u8], pos: &mut usize) -> Result<u64, BinaryCodecError> {
109    let mut shift = 0u32;
110    let mut result: u64 = 0;
111    let mut bytes_read = 0usize;
112    loop {
113        // Guard: EOF check
114        if *pos >= input.len() {
115            return Err(BinaryCodecError::Eof);
116        }
117        // Guard: varint byte limit
118        bytes_read += 1;
119        if bytes_read > MAX_VARINT_BYTES {
120            return Err(BinaryCodecError::VarintOverflow);
121        }
122        
123        let b = input[*pos];
124        *pos += 1;
125        let val = (b & 0x7F) as u64;
126        result |= val << shift;
127        if (b & 0x80) == 0 {
128            return Ok(result);
129        }
130        shift += 7;
131        if shift > 63 {
132            return Err(BinaryCodecError::Varint);
133        }
134    }
135}
136
137// ══════════════════════════════════════════════════════════════════════════════
138// Encoder
139// ══════════════════════════════════════════════════════════════════════════════
140
141/// Binary TLV encoder.
142#[derive(Default)]
143pub struct Encoder {
144    buf: Vec<u8>,
145}
146
147impl Encoder {
148    /// Creates a new empty encoder.
149    #[must_use]
150    pub fn new() -> Self {
151        Self { buf: Vec::new() }
152    }
153
154    /// Creates an encoder with pre-allocated capacity.
155    #[must_use]
156    pub fn with_capacity(cap: usize) -> Self {
157        Self {
158            buf: Vec::with_capacity(cap),
159        }
160    }
161
162    /// Returns the encoded bytes, consuming the encoder.
163    #[must_use]
164    pub fn finish(self) -> Vec<u8> {
165        self.buf
166    }
167
168    /// Returns the encoded bytes as a slice.
169    #[must_use]
170    pub fn as_slice(&self) -> &[u8] {
171        &self.buf
172    }
173
174    /// Clears the buffer for reuse.
175    pub fn clear(&mut self) {
176        self.buf.clear();
177    }
178
179    /// Returns current length.
180    #[must_use]
181    pub fn len(&self) -> usize {
182        self.buf.len()
183    }
184
185    /// Returns true if empty.
186    #[must_use]
187    pub fn is_empty(&self) -> bool {
188        self.buf.is_empty()
189    }
190
191    #[inline]
192    fn tag(&mut self, t: u8) {
193        self.buf.push(t);
194    }
195
196    #[inline]
197    fn write_len(&mut self, n: usize) {
198        encode_varint_u64(n as u64, &mut self.buf);
199    }
200
201    /// Encodes a `u64` as tagged varint.
202    pub fn u64(&mut self, v: u64) {
203        self.tag(T_U64);
204        encode_varint_u64(v, &mut self.buf);
205    }
206
207    /// Encodes raw bytes with length prefix.
208    pub fn bytes(&mut self, b: &[u8]) {
209        self.tag(T_BYTES);
210        self.write_len(b.len());
211        self.buf.extend_from_slice(b);
212    }
213
214    /// Encodes a UTF-8 string with length prefix.
215    pub fn str(&mut self, s: &str) {
216        self.tag(T_STR);
217        self.write_len(s.len());
218        self.buf.extend_from_slice(s.as_bytes());
219    }
220
221    /// Encodes a 32-byte CID (no length prefix, fixed size).
222    pub fn cid32(&mut self, cid: &Cid32) {
223        self.tag(T_CID32);
224        self.buf.extend_from_slice(&cid.0);
225    }
226
227    /// Encodes a 32-byte public key (no length prefix, fixed size).
228    pub fn public_key(&mut self, pk: &PublicKeyBytes) {
229        self.tag(T_PUBKEY32);
230        self.buf.extend_from_slice(&pk.0);
231    }
232
233    /// Encodes a 64-byte signature (no length prefix, fixed size).
234    pub fn signature(&mut self, sig: &SignatureBytes) {
235        self.tag(T_SIG64);
236        self.buf.extend_from_slice(&sig.0);
237    }
238}
239
240impl core::fmt::Debug for Encoder {
241    fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
242        write!(f, "Encoder(len={})", self.buf.len())
243    }
244}
245
246// ══════════════════════════════════════════════════════════════════════════════
247// Decoder
248// ══════════════════════════════════════════════════════════════════════════════
249
250/// Binary TLV decoder.
251pub struct Decoder<'a> {
252    data: &'a [u8],
253    pos: usize,
254}
255
256impl<'a> Decoder<'a> {
257    /// Creates a new decoder from a byte slice.
258    #[must_use]
259    pub fn new(data: &'a [u8]) -> Self {
260        Self { data, pos: 0 }
261    }
262
263    /// Returns the current position.
264    #[must_use]
265    pub fn position(&self) -> usize {
266        self.pos
267    }
268
269    /// Returns true if all data has been consumed.
270    #[must_use]
271    pub fn is_done(&self) -> bool {
272        self.pos >= self.data.len()
273    }
274
275    /// Returns remaining bytes.
276    #[must_use]
277    pub fn remaining(&self) -> usize {
278        self.data.len().saturating_sub(self.pos)
279    }
280
281    #[inline]
282    fn need(&self, n: usize) -> Result<(), BinaryCodecError> {
283        if self.pos + n <= self.data.len() {
284            Ok(())
285        } else {
286            Err(BinaryCodecError::Eof)
287        }
288    }
289
290    #[inline]
291    fn take(&mut self, n: usize) -> Result<&'a [u8], BinaryCodecError> {
292        self.need(n)?;
293        let s = &self.data[self.pos..self.pos + n];
294        self.pos += n;
295        Ok(s)
296    }
297
298    #[inline]
299    fn read_tag(&mut self, expected: u8) -> Result<(), BinaryCodecError> {
300        let got = *self.data.get(self.pos).ok_or(BinaryCodecError::Eof)?;
301        self.pos += 1;
302        if got == expected {
303            Ok(())
304        } else {
305            Err(BinaryCodecError::Tag { got, expected })
306        }
307    }
308
309    /// Decodes a tagged `u64`.
310    ///
311    /// # Errors
312    ///
313    /// Returns error if tag mismatch or malformed varint.
314    pub fn u64(&mut self) -> Result<u64, BinaryCodecError> {
315        self.read_tag(T_U64)?;
316        decode_varint_u64(self.data, &mut self.pos)
317    }
318
319    /// Decodes tagged bytes.
320    ///
321    /// # Errors
322    ///
323    /// - `BinaryCodecError::Tag` if tag mismatch
324    /// - `BinaryCodecError::SizeLimit` if length exceeds [`MAX_BYTES_LEN`]
325    /// - `BinaryCodecError::Eof` if insufficient data
326    pub fn bytes(&mut self) -> Result<&'a [u8], BinaryCodecError> {
327        self.read_tag(T_BYTES)?;
328        let len = decode_varint_u64(self.data, &mut self.pos)? as usize;
329        if len > MAX_BYTES_LEN {
330            return Err(BinaryCodecError::SizeLimit { declared: len });
331        }
332        self.take(len)
333    }
334
335    /// Decodes a tagged UTF-8 string.
336    ///
337    /// # Errors
338    ///
339    /// - `BinaryCodecError::Tag` if tag mismatch
340    /// - `BinaryCodecError::SizeLimit` if length exceeds [`MAX_BYTES_LEN`]
341    /// - `BinaryCodecError::Eof` if insufficient data
342    /// - `BinaryCodecError::Utf8` if invalid UTF-8
343    pub fn str(&mut self) -> Result<&'a str, BinaryCodecError> {
344        self.read_tag(T_STR)?;
345        let len = decode_varint_u64(self.data, &mut self.pos)? as usize;
346        if len > MAX_BYTES_LEN {
347            return Err(BinaryCodecError::SizeLimit { declared: len });
348        }
349        let b = self.take(len)?;
350        core::str::from_utf8(b).map_err(|_| BinaryCodecError::Utf8)
351    }
352
353    /// Decodes a 32-byte CID.
354    ///
355    /// # Errors
356    ///
357    /// Returns error if tag mismatch or insufficient data.
358    pub fn cid32(&mut self) -> Result<Cid32, BinaryCodecError> {
359        self.read_tag(T_CID32)?;
360        let b = self.take(32)?;
361        let mut out = [0u8; 32];
362        out.copy_from_slice(b);
363        Ok(Cid32(out))
364    }
365
366    /// Decodes a 32-byte public key.
367    ///
368    /// # Errors
369    ///
370    /// Returns error if tag mismatch or insufficient data.
371    pub fn public_key(&mut self) -> Result<PublicKeyBytes, BinaryCodecError> {
372        self.read_tag(T_PUBKEY32)?;
373        let b = self.take(32)?;
374        let mut out = [0u8; 32];
375        out.copy_from_slice(b);
376        Ok(PublicKeyBytes(out))
377    }
378
379    /// Decodes a 64-byte signature.
380    ///
381    /// # Errors
382    ///
383    /// Returns error if tag mismatch or insufficient data.
384    pub fn signature(&mut self) -> Result<SignatureBytes, BinaryCodecError> {
385        self.read_tag(T_SIG64)?;
386        let b = self.take(64)?;
387        let mut out = [0u8; 64];
388        out.copy_from_slice(b);
389        Ok(SignatureBytes(out))
390    }
391}
392
393// ══════════════════════════════════════════════════════════════════════════════
394// Frame encoding/decoding
395// ══════════════════════════════════════════════════════════════════════════════
396
397/// Encodes a frame: `typ (u8) + len (varint) + payload`.
398#[must_use]
399pub fn encode_frame(typ: u8, payload: &[u8]) -> Vec<u8> {
400    let mut out = Vec::with_capacity(1 + 10 + payload.len());
401    out.push(typ);
402    encode_varint_u64(payload.len() as u64, &mut out);
403    out.extend_from_slice(payload);
404    out
405}
406
407/// Decodes a frame, returning `(typ, payload)`.
408///
409/// # Errors
410///
411/// - `BinaryCodecError::Eof` if input is empty or truncated
412/// - `BinaryCodecError::SizeLimit` if declared length exceeds [`MAX_FRAME_LEN`]
413pub fn decode_frame(buf: &[u8]) -> Result<(u8, &[u8]), BinaryCodecError> {
414    if buf.is_empty() {
415        return Err(BinaryCodecError::Eof);
416    }
417    let typ = buf[0];
418    let mut pos = 1usize;
419    let len = decode_varint_u64(buf, &mut pos)? as usize;
420    
421    // Security: reject frames exceeding size limit
422    if len > MAX_FRAME_LEN {
423        return Err(BinaryCodecError::SizeLimit { declared: len });
424    }
425    
426    if pos + len > buf.len() {
427        return Err(BinaryCodecError::Eof);
428    }
429    Ok((typ, &buf[pos..pos + len]))
430}
431
432#[cfg(test)]
433mod tests {
434    use super::*;
435
436    #[test]
437    fn varint_roundtrip() {
438        let vals = [
439            0u64,
440            1,
441            127,
442            128,
443            255,
444            256,
445            16_384,
446            u32::MAX as u64,
447            u64::MAX,
448        ];
449        for &v in &vals {
450            let mut b = Vec::new();
451            encode_varint_u64(v, &mut b);
452            let mut p = 0usize;
453            let got = decode_varint_u64(&b, &mut p).unwrap();
454            assert_eq!(got, v);
455            assert_eq!(p, b.len());
456        }
457    }
458
459    #[test]
460    fn frame_roundtrip() {
461        let payload = [1u8, 2, 3, 4, 5, 6, 7, 8, 9];
462        let f = encode_frame(0x42, &payload);
463        let (t, p) = decode_frame(&f).unwrap();
464        assert_eq!(t, 0x42);
465        assert_eq!(p, &payload);
466    }
467
468    #[test]
469    fn encoder_decoder_roundtrip() {
470        let cid = Cid32([0xAB; 32]);
471        let pk = PublicKeyBytes([0x22; 32]);
472        let sig = SignatureBytes([0x33; 64]);
473
474        let mut enc = Encoder::new();
475        enc.cid32(&cid);
476        enc.public_key(&pk);
477        enc.signature(&sig);
478        enc.str("hello");
479        enc.u64(42);
480        enc.bytes(b"raw");
481        let buf = enc.finish();
482
483        let mut dec = Decoder::new(&buf);
484        let cid2 = dec.cid32().unwrap();
485        let pk2 = dec.public_key().unwrap();
486        let sig2 = dec.signature().unwrap();
487        let s = dec.str().unwrap();
488        let n = dec.u64().unwrap();
489        let raw = dec.bytes().unwrap();
490
491        assert!(dec.is_done());
492        assert_eq!(cid2.0, cid.0);
493        assert_eq!(pk2.0, pk.0);
494        assert_eq!(sig2.0, sig.0);
495        assert_eq!(s, "hello");
496        assert_eq!(n, 42);
497        assert_eq!(raw, b"raw");
498    }
499}