cobs 0.5.1

This is an implementation of the Consistent Overhead Byte Stuffing (COBS) algorithm. COBS is an algorithm for transforming a message into an encoding where a specific value (the "sentinel" value) is not used. This value can then be used to mark frame boundaries in a serial communication channel.
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
#[derive(Debug, PartialEq, Eq, thiserror::Error)]
#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
#[cfg_attr(feature = "defmt", derive(defmt::Format))]
#[error("out of bounds error during encoding")]
pub struct DestBufTooSmallError;

/// The [`EncoderState`] is used to track the current state of a streaming encoder. This struct
/// does not contain the output buffer (or a reference to one), and can be used when streaming the
/// encoded output to a custom data type
///
/// **IMPORTANT NOTE**: When implementing a custom streaming encoder,
/// the [`EncoderState`] state machine assumes that the output buffer
/// **ALREADY** contains a single placeholder byte, and no other bytes.
/// This placeholder byte will be later modified with the first distance
/// to the next header/zero byte.
#[derive(Clone, Debug)]
pub struct EncoderState {
    code_idx: usize,
    num_bt_sent: u8,
    offset_idx: u8,
}

/// [`PushResult`] is used to represent the changes to an (encoded)
/// output data buffer when an unencoded byte is pushed into [`EncoderState`].
pub enum PushResult {
    /// The returned byte should be placed at the current end of the data buffer
    AddSingle(u8),

    /// The byte at the given index should be replaced with the given byte.
    /// Additionally, a placeholder byte should be inserted at the current
    /// end of the output buffer to be later modified
    ModifyFromStartAndSkip((usize, u8)),

    /// The byte at the given index should be replaced with the given byte.
    /// Then, the last u8 in this tuple should be inserted at the end of the
    /// current output buffer. Finally, a placeholder byte should be inserted at
    /// the current end of the output buffer to be later modified if the encoding process is
    /// not done yet.
    ModifyFromStartAndPushAndSkip((usize, u8, u8)),
}

impl Default for EncoderState {
    /// Create a default initial state representation for a COBS encoder
    fn default() -> Self {
        Self {
            code_idx: 0,
            num_bt_sent: 1,
            offset_idx: 1,
        }
    }
}

impl EncoderState {
    /// Push a single unencoded byte into the encoder state machine
    pub fn push(&mut self, data: u8) -> PushResult {
        if data == 0 {
            let ret = PushResult::ModifyFromStartAndSkip((self.code_idx, self.num_bt_sent));
            self.code_idx += usize::from(self.offset_idx);
            self.num_bt_sent = 1;
            self.offset_idx = 1;
            ret
        } else {
            self.num_bt_sent += 1;
            self.offset_idx += 1;

            if 0xFF == self.num_bt_sent {
                let ret = PushResult::ModifyFromStartAndPushAndSkip((
                    self.code_idx,
                    self.num_bt_sent,
                    data,
                ));
                self.num_bt_sent = 1;
                self.code_idx += usize::from(self.offset_idx);
                self.offset_idx = 1;
                ret
            } else {
                PushResult::AddSingle(data)
            }
        }
    }

    /// Finalize the encoding process for a single message.
    /// The byte at the given index should be replaced with the given value,
    /// and the sentinel value (typically 0u8) must be inserted at the current
    /// end of the output buffer, serving as a framing byte.
    pub fn finalize(self) -> (usize, u8) {
        (self.code_idx, self.num_bt_sent)
    }
}

/// The [`CobsEncoder`] type is used to encode a stream of bytes to a given mutable output slice.
///
/// This is often useful when heap data structures are not available, or when not all message bytes
/// are received at a single point in time.
#[derive(Debug)]
pub struct CobsEncoder<'a> {
    dest: &'a mut [u8],
    dest_idx: usize,
    state: EncoderState,
    might_be_done: bool,
}

impl<'a> CobsEncoder<'a> {
    /// Create a new streaming Cobs Encoder.
    pub fn new(out_buf: &'a mut [u8]) -> CobsEncoder<'a> {
        CobsEncoder {
            dest: out_buf,
            dest_idx: 1,
            state: EncoderState::default(),
            might_be_done: false,
        }
    }

    /// Push a slice of data to be encoded
    pub fn push(&mut self, data: &[u8]) -> Result<(), DestBufTooSmallError> {
        // TODO: could probably check if this would fit without
        // iterating through all data

        // There was the possibility that the encoding process is done, but more data is pushed
        // instead of a `finalize` call, so the destination index needs to be incremented.
        if self.might_be_done {
            self.dest_idx += 1;
            self.might_be_done = false;
        }
        for (slice_idx, val) in data.iter().enumerate() {
            use PushResult::*;
            match self.state.push(*val) {
                AddSingle(y) => {
                    *self
                        .dest
                        .get_mut(self.dest_idx)
                        .ok_or(DestBufTooSmallError)? = y;
                }
                ModifyFromStartAndSkip((idx, mval)) => {
                    *self.dest.get_mut(idx).ok_or(DestBufTooSmallError)? = mval;
                }
                ModifyFromStartAndPushAndSkip((idx, mval, nval1)) => {
                    *self.dest.get_mut(idx).ok_or(DestBufTooSmallError)? = mval;
                    *self
                        .dest
                        .get_mut(self.dest_idx)
                        .ok_or(DestBufTooSmallError)? = nval1;
                    // Do not increase index if these is the possibility that we are finished.
                    if slice_idx == data.len() - 1 {
                        // If push is called again, the index will be incremented. If finalize
                        // is called, there is no need to increment the index.
                        self.might_be_done = true;
                    } else {
                        self.dest_idx += 1;
                    }
                }
            }

            // All branches above require advancing the pointer at least once
            self.dest_idx += 1;
        }

        Ok(())
    }

    /// Complete encoding of the output message. Does NOT terminate the message with the sentinel
    /// value.
    pub fn finalize(self) -> usize {
        // Get the last index that needs to be fixed
        let (idx, mval) = if self.dest_idx == 0 {
            (0, 0x01)
        } else {
            self.state.finalize()
        };

        // If the current code index is outside of the destination slice,
        // we do not need to write it out
        if let Some(i) = self.dest.get_mut(idx) {
            *i = mval;
        }

        self.dest_idx
    }
}

/// Encodes the `source` buffer into the `dest` buffer.
///
/// This function assumes the typical sentinel value of 0, but does not terminate the encoded
/// message with the sentinel value. This should be done by the caller to ensure proper framing.
///
/// # Returns
///
/// The number of bytes written to in the `dest` buffer.
///
/// # Panics
///
/// This function will panic if the `dest` buffer is not large enough for the
/// encoded message. You can calculate the size the `dest` buffer needs to be with
/// the [crate::max_encoding_length] function.
pub fn encode(source: &[u8], dest: &mut [u8]) -> usize {
    let mut enc = CobsEncoder::new(dest);
    enc.push(source).unwrap();
    enc.finalize()
}

/// Encodes the `source` buffer into the `dest` buffer, including the default sentinel values 0
/// around the encoded frame.
///
/// # Returns
///
/// The number of bytes written to in the `dest` buffer.
///
/// # Panics
///
/// This function will panic if the `dest` buffer is not large enough for the
/// encoded message. You can calculate the size the `dest` buffer needs to be by adding
/// the [crate::max_encoding_length] function output and 2.
pub fn encode_including_sentinels(source: &[u8], dest: &mut [u8]) -> usize {
    if dest.len() < 2 {
        panic!("destination buffer too small");
    }

    dest[0] = 0;
    let mut enc = CobsEncoder::new(&mut dest[1..]);
    enc.push(source).unwrap();
    let encoded_len = enc.finalize();
    dest[encoded_len + 1] = 0;
    encoded_len + 2
}

/// Attempts to encode the `source` buffer into the `dest` buffer.
///
/// This function assumes the typical sentinel value of 0, but does not terminate the encoded
/// message with the sentinel value. This should be done by the caller to ensure proper framing.
///
/// # Returns
///
/// The number of bytes written to in the `dest` buffer.
///
/// If the destination buffer does not have enough room, an error will be returned.
pub fn try_encode(source: &[u8], dest: &mut [u8]) -> Result<usize, DestBufTooSmallError> {
    let mut enc = CobsEncoder::new(dest);
    enc.push(source)?;
    Ok(enc.finalize())
}

/// Encodes the `source` buffer into the `dest` buffer, including the default sentinel values 0
/// around the encoded frame.
///
/// # Returns
///
/// The number of bytes written to in the `dest` buffer.
///
/// If the destination buffer does not have enough room, an error will be returned.
pub fn try_encode_including_sentinels(
    source: &[u8],
    dest: &mut [u8],
) -> Result<usize, DestBufTooSmallError> {
    if dest.len() < 2 {
        return Err(DestBufTooSmallError);
    }
    dest[0] = 0;
    let mut enc = CobsEncoder::new(&mut dest[1..]);
    enc.push(source)?;
    let encoded_len = enc.finalize();
    dest[encoded_len + 1] = 0;
    Ok(encoded_len + 2)
}

/// Encodes the `source` buffer into the `dest` buffer using an
/// arbitrary sentinel value.
///
/// This is done by first encoding the message with the typical sentinel value
/// of 0, then XOR-ing each byte of the encoded message with the chosen sentinel
/// value. This will ensure that the sentinel value doesn't show up in the encoded
/// message. See the paper "Consistent Overhead Byte Stuffing" for details.
///
/// This function does not terminate the encoded message with the sentinel value. This should be
/// done by the caller to ensure proper framing.
///
/// # Returns
///
/// The number of bytes written to in the `dest` buffer.
pub fn encode_with_sentinel(source: &[u8], dest: &mut [u8], sentinel: u8) -> usize {
    let encoded_size = encode(source, dest);
    for x in &mut dest[..encoded_size] {
        *x ^= sentinel;
    }
    encoded_size
}

#[cfg(feature = "alloc")]
/// Encodes the `source` buffer into a vector, using the [encode] function.
pub fn encode_vec(source: &[u8]) -> alloc::vec::Vec<u8> {
    let mut encoded = alloc::vec![0; crate::max_encoding_length(source.len())];
    let encoded_len = encode(source, &mut encoded[..]);
    encoded.truncate(encoded_len);
    encoded
}

#[cfg(feature = "alloc")]
/// Encodes the `source` buffer into a vector, using the [encode] function, while also adding
/// the sentinels around the encoded frame.
pub fn encode_vec_including_sentinels(source: &[u8]) -> alloc::vec::Vec<u8> {
    let mut encoded = alloc::vec![0; crate::max_encoding_length(source.len()) + 2];
    let encoded_len = encode_including_sentinels(source, &mut encoded);
    encoded.truncate(encoded_len + 2);
    encoded
}

#[cfg(feature = "alloc")]
/// Encodes the `source` buffer into a vector with an arbitrary sentinel value, using the
/// [encode_with_sentinel] function.
pub fn encode_vec_with_sentinel(source: &[u8], sentinel: u8) -> alloc::vec::Vec<u8> {
    let mut encoded = alloc::vec![0; crate::max_encoding_length(source.len())];
    let encoded_len = encode_with_sentinel(source, &mut encoded[..], sentinel);
    encoded.truncate(encoded_len);
    encoded
}

#[cfg(test)]
mod tests {
    use super::*;
    use crate::{
        decode_vec,
        tests::{test_decode_in_place, test_encode_decode_free_functions},
    };

    #[test]
    fn test_encode_0() {
        // An empty input is encoded as no characters.
        let mut output = [0xFFu8; 16];
        let used = encode(&[], &mut output);
        assert_eq!(used, 1);
        assert_eq!(output[0], 0x01);
    }

    fn test_pair(source: &[u8], encoded: &[u8]) {
        test_encode_decode_free_functions(source, encoded);
        test_decode_in_place(source, encoded);
    }

    #[test]
    fn test_encode_1() {
        test_pair(&[10, 11, 0, 12], &[3, 10, 11, 2, 12])
    }

    #[test]
    fn test_encode_empty() {
        test_pair(&[], &[1])
    }

    #[test]
    fn test_encode_2() {
        test_pair(&[0, 0, 1, 0], &[1, 1, 2, 1, 1])
    }

    #[test]
    fn test_encode_3() {
        test_pair(&[255, 0], &[2, 255, 1])
    }

    #[test]
    fn test_encode_4() {
        test_pair(&[1], &[2, 1])
    }

    #[test]
    fn encode_target_buf_too_small() {
        let source = &[10, 11, 0, 12];
        let expected = &[3, 10, 11, 2, 12];
        for len in 0..expected.len() {
            let mut dest = alloc::vec![0; len];
            matches!(
                try_encode(source, &mut dest).unwrap_err(),
                DestBufTooSmallError
            );
        }
    }

    #[test]
    fn try_encode_with_sentinels() {
        let source = &[10, 11, 0, 12];
        let expected = &[0, 3, 10, 11, 2, 12, 0];
        let mut dest = alloc::vec![0; expected.len()];
        let encoded_len = try_encode_including_sentinels(source, &mut dest).unwrap();
        assert_eq!(encoded_len, expected.len());
        assert_eq!(dest[0], 0);
        assert_eq!(dest[expected.len() - 1], 0);
        assert_eq!(decode_vec(&dest).unwrap(), source);
    }

    #[test]
    fn test_encoding_including_sentinels() {
        let data = [1, 2, 3];
        let encoded = encode_vec_including_sentinels(&data);
        assert_eq!(*encoded.first().unwrap(), 0);
        assert_eq!(*encoded.last().unwrap(), 0);
        let data_decoded = decode_vec(&encoded).unwrap();
        assert_eq!(data_decoded, data);
        let data_decoded = decode_vec(&encoded[1..]).unwrap();
        assert_eq!(data_decoded, data);
        let data_decoded = decode_vec(&encoded[1..encoded.len() - 1]).unwrap();
        assert_eq!(data_decoded, data);
    }

    #[test]
    #[should_panic]
    fn encode_target_buf_too_small_panicking() {
        let source = &[10, 11, 0, 12];
        let expected = &[3, 10, 11, 2, 12];
        encode(source, &mut alloc::vec![0; expected.len() - 1]);
    }
}