cobs_rs/
lib.rs

1//! A very minimal no_std [Consistent Overhead Byte
2//! Stuffing](https://en.wikipedia.org/wiki/Consistent_Overhead_Byte_Stuffing)
3//! library written in Rust. The COBS algorithm, and thus also this crate, provides
4//! an encoding for arbitrary data which removes any occurrence of a specific marker
5//! byte. This is mostly useful when we are transferring arbitrary data which
6//! is terminated with a null byte, and therefore we don't want our arbitrary data
7//! buffer to contain any null bytes. In fact, this crate will automatically the
8//! marker byte at the end of any encoded buffer.
9//!
10//! ## Features
11//!
12//! The *cobs-rs* crate only provides two specific functions. Namely, the
13//! [`stuff`] and the [`unstuff`] function, which encode and decode respectively. This, together
14//! with the fact that the crate doesn't use the [`std`](https://doc.rust-lang.org/std/index.html),
15//! makes the crate perfect for embedded hardware. However, it can also be used outside of embedded
16//! systems.
17//!
18//! ## Usage
19//!
20//! Both the encode([`stuff`]) and decode([`unstuff`]) functions, use [const
21//! generics](https://blog.rust-lang.org/2021/02/26/const-generics-mvp-beta). This
22//! may make usage a bit counter-intuitive for people unfamiliar with this feature
23//! at first.
24//!
25//! Something to take into account here is that the COBS algorithm will __at most__
26//! add `2 + (size of input buffer / 256)` (with integer division) bytes to the
27//! encoded buffer in size compared to input buffer. This fact allows us to always
28//! reserve enough space for the output buffer.
29//!
30//! ### Encoding buffers
31//!
32//! Let us have a look at a small example of how to encode some data using the
33//! [`stuff`] function.
34//!
35//! ```no_run
36//! let data: [u8; 254] = [
37//!     // ...snip
38//! # 0; 254
39//! ];
40//!
41//! // Our input buffer is 254 bytes long.
42//! // Thus, we need to reserve 2 + (254 / 256) = 2 extra bytes
43//! // for the encoded buffer.
44//! let encoded: [u8; 256] = cobs_rs::stuff(data, 0x00);
45//!
46//! // We can also encode much larger buffers
47//! let a_lot_of_data: [u8; 1337] = [
48//!     // ...snip
49//! # 0; 1337
50//! ];
51//!
52//! // Our input buffer is 1337 bytes long.
53//! // Thus, we need to reserve 2 + (1337 / 256) = 7 extra bytes
54//! // for the encoded buffer.
55//! let a_lot_of_output: [u8; 1344] = cobs_rs::stuff(a_lot_of_data, 0x00);
56//! ```
57//!
58//! > **Note:** The output buffer type specifications are always necessary. The type
59//! > specifications for the input data isn't necessary most of the time.
60//!
61//! ### Decoding buffers
62//!
63//! Now, let us look at an example of how to decode data using the [`unstuff`] function.
64//!
65//! It is generally a good idea to reserve `size of encoded buffer - 2` bytes for
66//! the decoded buffer. With this rule, we will always have enough space for the
67//! encoded buffer. Next to the decoded buffer, the [`unstuff`] function will
68//! also return the actual filled size of the buffer.
69//!
70//! ```no_run
71//! // We are given some encoded data buffer
72//! let encoded_data: [u8; 256] = [
73//!     //... snip
74//! # 0; 256
75//! ];
76//!
77//! // We reserve 256 - 2 = 254 bytes for the decoded buffer.
78//! let (decoded_data, decoded_data_length): ([u8; 254], usize) =
79//!     cobs_rs::unstuff(encoded_data, 0x00);
80//!
81//! // We can also decode bigger buffers
82//! let a_lot_of_encoded_data: [u8; 1344] = [
83//!     //... snip
84//! # 0; 1344
85//! ];
86//!
87//! // We reserve 1344 - 2 = 1342 bytes for the decoded buffer.
88//! let (a_lot_of_decoded_data, a_lot_of_decoded_data_length): ([u8; 1342], usize) =
89//!     cobs_rs::unstuff(encoded_data, 0x00);
90//! ```
91//!
92//! > **Note:** The decoded buffer type specifications are always necessary. The
93//! > type specifications for the encoded data isn't necessary most of the time.
94//!
95//! ## License
96//!
97//! Licensed under a __MIT__ license.
98
99#![no_std]
100#![warn(missing_docs)]
101
102use core::convert::TryInto;
103
104struct MarkerInfo {
105    index: usize,
106    points_to: usize,
107}
108
109impl MarkerInfo {
110    fn adjust_accordingly<const SIZE: usize>(
111        &mut self,
112        out_buffer: &mut [u8; SIZE],
113        new_index: usize,
114    ) {
115        out_buffer[self.index] = (new_index - self.index).try_into().unwrap();
116
117        self.index = new_index;
118        self.points_to = new_index + 0xff;
119    }
120}
121
122/// Takes an input buffer and a marker value and COBS-encodes it to an output buffer.
123///
124/// Removes all occurrences of the marker value and adds one occurrence at the end. The returned
125/// buffer should at least be 2 greater than the input buffer and for roughly 256 bytes there is a
126/// possibility for an extra byte in the output buffer. All left-over space will and the end of
127/// the buffer and will be filled with the marker value.
128///
129/// # Examples
130///
131/// ## Stuffing arbitrary data
132///
133/// ```
134/// let transfer: [u8; 256] = cobs_rs::stuff(
135///     *b"Hi everyone! This is a pretty nifty example.",
136///     b'i'
137/// );
138///
139/// // Now the data won't contain 'i's anymore except for the terminator byte.
140/// # assert!(transfer[..45].into_iter().all(|byte| *byte != b'i'));
141/// ```
142///
143/// ## Making sure there are no null bytes anymore
144///
145/// ```
146/// let data = [
147///     // ...snip
148/// #       1
149/// ];
150///
151/// let transfer: [u8; 256] = cobs_rs::stuff(data, 0x00);
152///
153/// // Now the data won't contain null bytes anymore except for the terminator byte.
154/// ```
155///
156/// # Panics
157///
158/// This function panics when the output buffer doesn't have enough space to fill the data from the
159/// input buffer with.
160pub fn stuff<const INPUT: usize, const OUTPUT: usize>(
161    buff: [u8; INPUT],
162    marker: u8,
163) -> [u8; OUTPUT] {
164    let mut output_buffer: [u8; OUTPUT] = [marker; OUTPUT];
165
166    // Keep track of where the last marker was.
167    // This always has one in the beginning, which is the overhead byte.
168    let mut last_marker = MarkerInfo {
169        index: 0,
170        points_to: 0xff,
171    };
172
173    // Every time we set additional overhead marker, we should increase the offset.
174    // This way we keep track what the relationship is between the input array indices and the
175    // output array indices.
176    let mut overhead_bytes = 1;
177
178    // Loop through all the input bytes.
179    for i in 0..INPUT {
180        // Fetch the value of the input byte array.
181        let value = buff[i];
182
183        if last_marker.points_to == (overhead_bytes + i) {
184            // Update the last marker and set the marker info to this new overhead byte.
185            last_marker.adjust_accordingly(&mut output_buffer, overhead_bytes + i);
186
187            // Say that we have another overhead byte.
188            overhead_bytes += 1;
189        }
190
191        // If the current input value is a marker, adjust the previous marker accordingly and skip
192        // the setting of the value, although it doesn't really matter.
193        if value == marker {
194            // Update the last marker value and info to this new marker.
195            last_marker.adjust_accordingly(&mut output_buffer, overhead_bytes + i);
196
197            continue;
198        }
199
200        // Update the output buffer value
201        output_buffer[overhead_bytes + i] = value;
202    }
203
204    // For the last byte we update the previous marker.
205    output_buffer[last_marker.index] = (INPUT + overhead_bytes - last_marker.index)
206        .try_into()
207        .unwrap();
208
209    if marker != 0x00 {
210        for i in 0..(OUTPUT - 1) {
211            output_buffer[i] ^= marker;
212        }
213    }
214
215    output_buffer
216}
217
218/// Takes an input buffer and a marker value and COBS-decodes it to an output buffer.
219///
220/// Removes all overhead bytes, inserts the marker where appropriate and __stops immediately__ when
221/// a marker value is found. The size of output buffer is at least 2 bytes smaller than the size
222/// of the input buffer. All left-over space will and the end of the buffer and will be filled with
223/// the `0x00` bytes. The tuple returned contains both the decoded buffer and the actual filled
224/// length of that buffer.
225///
226/// # Examples
227///
228/// ```no_run
229/// let transferred_data: [u8; 258] = [
230///     // ... snip
231/// # 0; 258
232/// ];
233///
234/// // We convert the COBS-encoded transferred_data to the plain data
235/// // using the unstuff function.
236/// let (plain_data, plain_data_length): ([u8; 256], usize) =
237///     cobs_rs::unstuff(transferred_data, 0x00);
238///
239/// // ... snip
240/// ```
241///
242/// # Panics
243///
244/// If we don't have a marker value in the encoded data buffer, the function panics.
245///
246/// This function also panics when the output buffer doesn't have enough space to fill the data
247/// from the input buffer with. This never happens if we reserve the maximum possible memory for
248/// the output, that being two less bytes than the input buffer.
249pub fn unstuff<const INPUT: usize, const OUTPUT: usize>(
250    mut buff: [u8; INPUT],
251    marker: u8,
252) -> ([u8; OUTPUT], usize) {
253    let mut output_buffer = [0; OUTPUT];
254
255    // Remove all occurrences of the marker byte
256    if marker != 0x00 {
257        for i in 0..(INPUT - 1) {
258            buff[i] ^= marker;
259        }
260    }
261
262    // Keep track when the next marker will be. Initial this will be after the first overhead byte
263    // value. We have to do minus 1 here, because we start our loop at 1 instead of 0.
264    let mut until_next_marker = buff[0] - 1;
265    // If this bits value is 0xff, we know that the next value will be an overhead byte, so keep
266    // track of that.
267    let mut next_is_overhead_byte = buff[0] == 0xff;
268
269    // Keep track of the amount of overhead bytes, so that we can compensate for it when filling
270    // our output buffer.
271    let mut overhead_bytes = 1;
272
273    // We can skip byte since it is the overhead byte we already know about.
274    let mut i = 1;
275
276    let output_buffer_length = loop {
277        // Fetch the value from the input buffer.
278        let value = buff[i];
279
280        // If we value is the marker, we know we have reached the end.
281        if value == marker {
282            break i - overhead_bytes - 1;
283        }
284
285        // If the current character is a marker or a overhead byte.
286        if until_next_marker == 0 {
287            // We know that the distance to the next marker will be the value of this marker.
288            until_next_marker = value;
289
290            // If this byte was a overhead byte.
291            if next_is_overhead_byte {
292                // Keep that that we passed another overhead byte.
293                overhead_bytes += 1;
294            } else {
295                // If it wasn't a overhead byte, we can set this byte to the marker byte.
296                output_buffer[i - overhead_bytes] = marker;
297            }
298
299            // Check whether the next byte will be a overhead byte.
300            next_is_overhead_byte = until_next_marker == 0xff;
301        } else {
302            // If we are not on a marker or overhead byte we can just copy the value over.
303            output_buffer[i - overhead_bytes] = value;
304        }
305
306        until_next_marker -= 1;
307
308        if i < INPUT {
309            i += 1;
310        } else {
311            panic!("No marker value found!");
312        }
313    } + 1;
314
315    (output_buffer, output_buffer_length)
316}
317
318#[cfg(test)]
319mod tests {
320    use super::*;
321    use core::ops::Range;
322
323    #[derive(Debug)]
324    struct TestVector<const N: usize, const M: usize> {
325        unencoded_data: [u8; N],
326        encoded_data: [u8; M],
327    }
328
329    impl<const N: usize, const M: usize> TestVector<N, M> {
330        const fn new(unencoded_data: [u8; N], encoded_data: [u8; M]) -> Self {
331            Self {
332                unencoded_data,
333                encoded_data,
334            }
335        }
336
337        fn assert_stuff(&self) {
338            assert_eq!(stuff::<N, M>(self.unencoded_data, 0x00), self.encoded_data);
339        }
340
341        fn assert_unstuff(&self) {
342            assert_eq!(
343                unstuff::<M, N>(self.encoded_data, 0x00),
344                (self.unencoded_data, self.unencoded_data.len())
345            );
346        }
347
348        fn assert_stuff_then_unstuff(&self) {
349            assert_eq!(
350                unstuff::<M, N>(stuff(self.unencoded_data, 0x00), 0x00),
351                (self.unencoded_data, self.unencoded_data.len())
352            );
353        }
354
355        fn assert_unstuff_then_stuff(&self) {
356            assert_eq!(
357                stuff::<N, M>(unstuff(self.encoded_data, 0x00).0, 0x00),
358                self.encoded_data
359            );
360        }
361    }
362
363    fn get_range<const N: usize>(
364        mut initial: [u8; N],
365        start_index: usize,
366        range: Range<u8>,
367    ) -> [u8; N] {
368        for (index, value) in range.enumerate() {
369            initial[index + start_index] = value;
370        }
371
372        initial
373    }
374
375    const TV_1: TestVector<1, 3> = TestVector::new([0x00], [0x01, 0x01, 0x00]);
376    const TV_2: TestVector<2, 4> = TestVector::new([0x00, 0x00], [0x01, 0x01, 0x01, 0x00]);
377    const TV_3: TestVector<4, 6> = TestVector::new(
378        [0x11, 0x22, 0x00, 0x33],
379        [0x03, 0x11, 0x22, 0x02, 0x33, 0x00],
380    );
381    const TV_4: TestVector<4, 6> = TestVector::new(
382        [0x11, 0x22, 0x33, 0x44],
383        [0x05, 0x11, 0x22, 0x33, 0x44, 0x00],
384    );
385    const TV_5: TestVector<4, 6> = TestVector::new(
386        [0x11, 0x00, 0x00, 0x00],
387        [0x02, 0x11, 0x01, 0x01, 0x01, 0x00],
388    );
389    fn tv_6() -> TestVector<254, 256> {
390        TestVector::new(
391            get_range([0; 254], 0, 0x01..0xff),
392            get_range(
393                {
394                    let mut arr = [0; 256];
395                    arr[0] = 0xff;
396                    arr
397                },
398                1,
399                0x01..0xff,
400            ),
401        )
402    }
403    fn tv_7() -> TestVector<255, 257> {
404        TestVector::new(
405            get_range([0; 255], 0, 0x00..0xff),
406            get_range(
407                {
408                    let mut arr = [0; 257];
409                    arr[0] = 0x01;
410                    arr[1] = 0xff;
411                    arr
412                },
413                2,
414                0x01..0xff,
415            ),
416        )
417    }
418
419    fn tv_8() -> TestVector<255, 258> {
420        TestVector::new(
421            get_range([0xff; 255], 0, 0x01..0xff),
422            get_range(
423                {
424                    let mut arr = [0; 258];
425                    arr[0] = 0xff;
426                    arr[255] = 0x02;
427                    arr[256] = 0xff;
428                    arr
429                },
430                1,
431                0x01..0xff,
432            ),
433        )
434    }
435
436    fn tv_9() -> TestVector<255, 258> {
437        TestVector::new(
438            get_range(
439                {
440                    let mut arr = [0xff; 255];
441                    arr[254] = 0;
442                    arr
443                },
444                0,
445                0x02..0xff,
446            ),
447            get_range(
448                {
449                    let mut arr = [0; 258];
450                    arr[0] = 0xff;
451                    arr[254] = 0xff;
452                    arr[255] = 0x01;
453                    arr[256] = 0x01;
454                    arr
455                },
456                1,
457                0x02..0xff,
458            ),
459        )
460    }
461
462    fn tv_10() -> TestVector<255, 257> {
463        TestVector::new(
464            get_range(
465                {
466                    let mut arr = [0xff; 255];
467                    arr[253] = 0x00;
468                    arr[254] = 0x01;
469                    arr
470                },
471                0,
472                0x03..0xff,
473            ),
474            get_range(
475                {
476                    let mut arr = [0; 257];
477                    arr[0] = 0xfe;
478                    arr[253] = 0xff;
479                    arr[254] = 0x02;
480                    arr[255] = 0x01;
481                    arr
482                },
483                1,
484                0x03..0xff,
485            ),
486        )
487    }
488
489    #[test]
490    fn stuff_test_vectors() {
491        TV_1.assert_stuff();
492        TV_2.assert_stuff();
493        TV_3.assert_stuff();
494        TV_4.assert_stuff();
495        TV_5.assert_stuff();
496        tv_6().assert_stuff();
497        tv_7().assert_stuff();
498        tv_8().assert_stuff();
499        tv_9().assert_stuff();
500        tv_10().assert_stuff();
501    }
502
503    #[test]
504    fn unstuff_test_vectors() {
505        TV_1.assert_unstuff();
506        TV_2.assert_unstuff();
507        TV_3.assert_unstuff();
508        TV_4.assert_unstuff();
509        TV_5.assert_unstuff();
510        tv_6().assert_unstuff();
511        tv_7().assert_unstuff();
512        tv_8().assert_unstuff();
513        tv_9().assert_unstuff();
514        tv_10().assert_unstuff();
515
516        assert_eq!(
517            unstuff([0x01, 0x01, 0x00], 0x00),
518            ([0x00, 0x00, 0x00, 0x00], 1)
519        );
520        assert_eq!(
521            unstuff([0x02, 0x01, 0x00], 0x00),
522            ([0x01, 0x00, 0x00, 0x00], 1)
523        );
524    }
525
526    #[test]
527    fn inverses() {
528        TV_1.assert_stuff_then_unstuff();
529        TV_2.assert_stuff_then_unstuff();
530        TV_3.assert_stuff_then_unstuff();
531        TV_4.assert_stuff_then_unstuff();
532        TV_5.assert_stuff_then_unstuff();
533        tv_6().assert_stuff_then_unstuff();
534        tv_7().assert_stuff_then_unstuff();
535        tv_8().assert_stuff_then_unstuff();
536        tv_9().assert_stuff_then_unstuff();
537        tv_10().assert_stuff_then_unstuff();
538
539        TV_1.assert_unstuff_then_stuff();
540        TV_2.assert_unstuff_then_stuff();
541        TV_3.assert_unstuff_then_stuff();
542        TV_4.assert_unstuff_then_stuff();
543        TV_5.assert_unstuff_then_stuff();
544        tv_6().assert_unstuff_then_stuff();
545        tv_7().assert_unstuff_then_stuff();
546        tv_8().assert_unstuff_then_stuff();
547        tv_9().assert_unstuff_then_stuff();
548        tv_10().assert_unstuff_then_stuff();
549    }
550
551    // Issue #1: https://github.com/coastalwhite/cobs-rs/issues/1
552    #[test]
553    fn non_zero_byte() {
554        let transfer: [u8; 130] = stuff(
555            *b"----------------------------------------------------------------A----------------------------------------------------------------",
556            b'A'
557        );
558
559        // Now the data won't contain 'i's anymore except for the terminator byte.
560        assert!(transfer.iter().all(|byte| *byte != b'A'));
561    }
562}