darkbio_cobs/
lib.rs

1// cobs-rs: fast cobs encoder and decoder
2// Copyright 2025 Dark Bio AG. All rights reserved.
3
4/// Error types that can be returned from encoding.
5#[derive(Debug, Clone, Copy, PartialEq, Eq, thiserror::Error)]
6pub enum EncodeError {
7    #[error("buffer too small: have {have} bytes, want {want} bytes")]
8    BufferTooSmall { have: usize, want: usize },
9}
10
11/// Error types that can be returned from decoding.
12#[derive(Debug, Clone, Copy, PartialEq, Eq, thiserror::Error)]
13pub enum DecodeError {
14    #[error("empty input")]
15    EmptyInput,
16    #[error("buffer too small: have {have} bytes, want {want} bytes")]
17    BufferTooSmall { have: usize, want: usize },
18    #[error("zero marker at position {at}")]
19    ZeroMarker { at: usize },
20    #[error("zero byte in data at position {at}")]
21    ZeroBinary { at: usize },
22    #[error("chunk overflow at position {at}: chunk {marker} exceeds data length {len}")]
23    ChunkOverflow { at: usize, marker: u8, len: usize },
24}
25
26/// Computes the maximum size needed to COBS encode a blind input blob.
27#[inline]
28pub const fn encode_buffer(size: usize) -> usize {
29    size + size.div_ceil(254) + 1
30}
31
32/// Computes the maximum size needed to COBS decode a blind input data.
33#[inline]
34pub const fn decode_buffer(size: usize) -> usize {
35    if size == 0 {
36        // Zero length COBS is invalid. We could panic here, but that makes call
37        // sites brittle when parsing potentially malicious input. We could also
38        // return an error, but that makes the method so much uglier. Returning
39        // zero is safe however, because the caller can still alloc a zero-byte
40        // buffer and the decoder will error anyway.
41        return 0;
42    }
43    size - 1
44}
45
46/// Encodes an opaque data blob with COBS using 0 as the sentinel value. Returns
47/// the number of bytes the encoding took. Returns an error if the output buffer
48/// is too small.
49#[inline]
50pub fn encode(data: &[u8], encoded: &mut [u8]) -> Result<usize, EncodeError> {
51    let want = encode_buffer(data.len());
52    if encoded.len() < want {
53        return Err(EncodeError::BufferTooSmall {
54            have: encoded.len(),
55            want,
56        });
57    }
58    Ok(encode_unsafe(data, encoded))
59}
60
61/// Encodes an opaque data blob with COBS using 0 as the sentinel value. Returns
62/// the number of bytes the encoding took.
63///
64/// # Safety
65/// The caller must ensure `encoded` has at least `encode_buffer(data.len())` bytes.
66#[inline]
67pub fn encode_unsafe(data: &[u8], encoded: &mut [u8]) -> usize {
68    // The empty blob is always encoded as 0x01
69    if data.is_empty() {
70        encoded[0] = 0x01;
71        return 1;
72    }
73    // Sanity check in debug builds that the user called it correctly
74    debug_assert!(encoded.len() >= encode_buffer(data.len()));
75
76    // Start pushing the bytes into the output array, skipping each marker byte
77    // and backfilling it later
78    unsafe {
79        let mut marker_pos = 0usize;
80        let mut output_pos = 1usize;
81        let mut run_length = 1u8;
82
83        for &b in data {
84            // If the next byte is non-zero, append it to the output
85            if b > 0 {
86                *encoded.get_unchecked_mut(output_pos) = b;
87                output_pos += 1;
88                run_length += 1;
89
90                // If an entire chunk was non-zero, mark and start the next chunk
91                if run_length == 0xff {
92                    *encoded.get_unchecked_mut(marker_pos) = run_length;
93                    marker_pos = output_pos;
94                    output_pos += 1;
95                    run_length = 1;
96                }
97            } else {
98                // Next byte is zero, terminate the chunk and start the next chunk
99                *encoded.get_unchecked_mut(marker_pos) = run_length;
100                marker_pos = output_pos;
101                output_pos += 1;
102                run_length = 1;
103            }
104        }
105        // Terminate any unfinished chunk
106        let last_byte = *data.get_unchecked(data.len() - 1);
107        if run_length > 1 || last_byte == 0 {
108            *encoded.get_unchecked_mut(marker_pos) = run_length;
109        } else {
110            // Just finished at the chunk boundary, revert last open
111            output_pos -= 1;
112        }
113        // Return the number of bytes written to the output stream
114        output_pos
115    }
116}
117
118/// Decodes an opaque data blob with COBS using 0 as the sentinel value. Returns
119/// the number of bytes the decoding took. Returns an error if the output buffer
120/// is too small or if the input is malformed.
121#[inline]
122pub fn decode(data: &[u8], decoded: &mut [u8]) -> Result<usize, DecodeError> {
123    if data.is_empty() {
124        return Err(DecodeError::EmptyInput);
125    }
126    if data.len() > 1 {
127        let want = decode_buffer(data.len());
128        if decoded.len() < want {
129            return Err(DecodeError::BufferTooSmall {
130                have: decoded.len(),
131                want,
132            });
133        }
134    }
135    decode_unsafe(data, decoded)
136}
137
138/// Decodes an opaque data blob with COBS using 0 as the sentinel value. Returns
139/// the number of bytes the decoding took.
140///
141/// # Safety
142/// The caller must ensure `decoded` has at least `decode_buffer(data.len())` bytes.
143#[inline]
144pub fn decode_unsafe(data: &[u8], decoded: &mut [u8]) -> Result<usize, DecodeError> {
145    // The empty blob is not a valid COBS encoding
146    if data.is_empty() {
147        return Err(DecodeError::EmptyInput);
148    }
149    // The empty text is always encoded as 0x01
150    if data.len() == 1 && data[0] == 0x01 {
151        return Ok(0);
152    }
153    // Sanity check in debug builds that the user called it correctly
154    debug_assert!(decoded.len() >= decode_buffer(data.len()));
155
156    // Consume the input stream one chunk at a time
157    unsafe {
158        let mut output_pos = 0usize;
159        let mut i = 0usize;
160
161        while i < data.len() {
162            // Zero cannot be part of a COBS encoded stream
163            let marker = *data.get_unchecked(i);
164            if marker == 0 {
165                return Err(DecodeError::ZeroMarker { at: i });
166            }
167            i += 1;
168
169            // If the marker defines an overflowing chunk, abort
170            if i + (marker as usize) - 1 > data.len() {
171                return Err(DecodeError::ChunkOverflow {
172                    at: i - 1,
173                    marker,
174                    len: data.len(),
175                });
176            }
177            // Consume the entire chunk, ensuring there's no zero in it
178            for _ in 1..marker {
179                let b = *data.get_unchecked(i);
180                if b == 0 {
181                    return Err(DecodeError::ZeroBinary { at: i });
182                }
183                *decoded.get_unchecked_mut(output_pos) = b;
184                output_pos += 1;
185                i += 1;
186            }
187            // If we had a partial chunk, there must be a zero following
188            if i < data.len() && marker != 0xff {
189                *decoded.get_unchecked_mut(output_pos) = 0;
190                output_pos += 1;
191            }
192        }
193        Ok(output_pos)
194    }
195}
196
197#[cfg(test)]
198mod tests {
199    use super::*;
200
201    #[test]
202    fn test_roundtrip_empty() {
203        let data = [];
204        let mut enc_buf = [0u8; 1];
205        let len = encode(&data, &mut enc_buf).unwrap();
206        assert_eq!(len, 1);
207        assert_eq!(enc_buf[0], 0x01);
208
209        let mut dec_buf = [0u8; 0];
210        let dec_len = decode(&enc_buf[..len], &mut dec_buf).unwrap();
211        assert_eq!(dec_len, 0);
212    }
213
214    #[test]
215    fn test_roundtrip_no_zeros() {
216        let data = [1, 2, 3, 4, 5];
217        let mut enc_buf = [0u8; encode_buffer(5)];
218        let len = encode(&data, &mut enc_buf).unwrap();
219
220        let mut dec_buf = [0u8; decode_buffer(encode_buffer(5))];
221        let dec_len = decode(&enc_buf[..len], &mut dec_buf).unwrap();
222        assert_eq!(&dec_buf[..dec_len], &data);
223    }
224
225    #[test]
226    fn test_roundtrip_with_zeros() {
227        let data = [0, 1, 0, 2, 0, 0, 3];
228        let mut enc_buf = [0u8; encode_buffer(7)];
229        let len = encode(&data, &mut enc_buf).unwrap();
230
231        let mut dec_buf = [0u8; decode_buffer(encode_buffer(7))];
232        let dec_len = decode(&enc_buf[..len], &mut dec_buf).unwrap();
233        assert_eq!(&dec_buf[..dec_len], &data);
234    }
235
236    #[test]
237    fn test_roundtrip_254_nonzero() {
238        let data: Vec<u8> = (1..=254).collect();
239        let mut enc_buf = vec![0u8; encode_buffer(254)];
240        let len = encode(&data, &mut enc_buf).unwrap();
241
242        let mut dec_buf = vec![0u8; decode_buffer(enc_buf.len())];
243        let dec_len = decode(&enc_buf[..len], &mut dec_buf).unwrap();
244        assert_eq!(&dec_buf[..dec_len], &data[..]);
245    }
246
247    #[test]
248    fn test_roundtrip_255_nonzero() {
249        let data: Vec<u8> = (1..=254).chain(std::iter::once(1)).collect();
250        let mut enc_buf = vec![0u8; encode_buffer(255)];
251        let len = encode(&data, &mut enc_buf).unwrap();
252
253        let mut dec_buf = vec![0u8; decode_buffer(enc_buf.len())];
254        let dec_len = decode(&enc_buf[..len], &mut dec_buf).unwrap();
255        assert_eq!(&dec_buf[..dec_len], &data[..]);
256    }
257}