Skip to main content

packbits_rle/
lib.rs

1#![doc = include_str!("../README.md")]
2#![feature(seek_stream_len)]
3use std::{
4    io::{self, ErrorKind},
5    iter,
6};
7
8mod reader;
9
10pub use reader::Reader;
11
12#[derive(Debug, thiserror::Error)]
13/// General error used in packbit extraction
14pub enum Error {
15    /// Input does not provide enough data to fill the requested size
16    #[error("Input does not contain enough packbits data to fill the buffer")]
17    NotEnoughInputData,
18
19    /// The output buffer is too small or the decoded size does not fit into the requested size
20    #[error("Output buffer is too small to hold decoded data")]
21    TooMuchOutputData,
22
23    /// Input has additional unread data left after decoding
24    #[error("Input has unread data left after filling the output buffer")]
25    TooMuchInputData,
26
27    /// An operation (like read or seek) on the underlying reader has failed
28    #[error(transparent)]
29    Io(#[from] io::Error),
30}
31
32impl From<Error> for io::Error {
33    fn from(value: Error) -> Self {
34        match value {
35            Error::Io(error) => error,
36            me => io::Error::other(me),
37        }
38    }
39}
40#[derive(Debug)]
41/// A packbits command, also called the flag-counter byte, specifying how the next few bytes in a stream will be treated
42pub enum Command {
43    /// Escape code, next byte will be emitted literally
44    Escape,
45    /// Copy the next `self.0` bytes verbatim to output
46    Literal(u8),
47    /// Repeat the next byte `self.0` times
48    Repeat(u16),
49}
50
51impl Command {
52    #[inline]
53    /// Produces a byte and returns the next decoder state ([`Operation`]) at the same time
54    pub fn execute<T: io::Read>(self, reader: &mut T) -> Result<(u8, Operation), OperationError> {
55        match self {
56            Command::Escape => {
57                let byte = Self::read_byte(reader)?;
58                Ok((byte, Operation::Literal(0)))
59            }
60            Command::Literal(count) => {
61                let literal = Self::read_byte(reader)?;
62                Ok((literal, Operation::Literal(count - 1)))
63            }
64            Command::Repeat(count) => {
65                let literal = Self::read_byte(reader)?;
66                Ok((literal, Operation::Repeat(literal, count - 1)))
67            }
68        }
69    }
70
71    #[inline]
72    fn read_byte<T: io::Read>(reader: &mut T) -> Result<u8, OperationError> {
73        let mut buf = [0u8];
74        match reader.read(&mut buf) {
75            Ok(0) => Err(OperationError::UnexpectedEof),
76            Err(e) if e.kind() == io::ErrorKind::UnexpectedEof => {
77                Err(OperationError::UnexpectedEof)
78            }
79            Err(e) => Err(e)?,
80            Ok(_) => Ok(buf[0]),
81        }
82    }
83}
84
85impl From<i8> for Command {
86    #[inline]
87    fn from(val: i8) -> Self {
88        match val {
89            -128 => Command::Escape,
90            n if n < 0 => Command::Repeat((1 - n) as u16),
91            n => Command::Literal((1 + n) as u8),
92        }
93    }
94}
95
96impl From<u8> for Command {
97    #[inline]
98    fn from(val: u8) -> Self {
99        match val {
100            n if n < 128 => Command::Literal(1 + n),
101            128 => Command::Escape,
102            n => Command::Repeat(257 - n as u16),
103        }
104    }
105}
106
107#[derive(Debug)]
108/// Describes an unpacking operation, that can be fed additional data to recieved an output
109/// byte and the next operation
110pub enum Operation {
111    /// We're in the middle of returning literal bytes
112    Literal(u8),
113    /// The unpacker will emit `.1` more repetitions of byte `.0`
114    Repeat(u8, u16),
115}
116
117#[derive(Debug, thiserror::Error)]
118/// Error for executing [`Operation`]s, on insufficient input it provides a [`Command`] to continue the
119/// operation when more data becomes available
120pub enum OperationError {
121    /// The input stream ended, before the operation could complete.
122    ///
123    /// If more data becomes available, the provided command can be used to continue unpacking
124    /// where we left off.
125    #[error("More input data is required to executed command")]
126    InsufficientInput(Command),
127
128    /// The input stream ended with no unfinished command, this could be a good place to stop
129    /// unpacking if enough input bytes have been read or enough output bytes provided.
130    #[error("More input data is required to continue")]
131    UnexpectedEof,
132
133    /// An operation like _read_ or _seek_ on the underlying underlying reader has failed.
134    #[error(transparent)]
135    Io(#[from] io::Error),
136}
137
138impl From<OperationError> for io::Error {
139    fn from(value: OperationError) -> Self {
140        match value {
141            OperationError::Io(error) => error,
142            other => io::Error::other(other),
143        }
144    }
145}
146
147impl Default for Operation {
148    fn default() -> Self {
149        Self::new()
150    }
151}
152
153impl Operation {
154    /// Create a new empty operation, advancing ([`Operation::advance`]) it will read an entire new operation from the
155    /// underlying reader.
156    pub fn new() -> Self {
157        Self::Literal(0)
158    }
159
160    #[inline]
161    fn read_byte<T: io::Read>(reader: &mut T) -> Result<u8, OperationError> {
162        let mut buf = [0u8];
163        match reader.read(&mut buf) {
164            Ok(0) => Err(OperationError::UnexpectedEof),
165            Err(e) if e.kind() == io::ErrorKind::UnexpectedEof => {
166                Err(OperationError::UnexpectedEof)
167            }
168            Err(e) => Err(e)?,
169            Ok(_) => Ok(buf[0]),
170        }
171    }
172
173    #[inline]
174    /// Produce a byte and return next state of the decoder at the same time
175    ///
176    /// If `self` was already completed, this function will try to fetch the next command from the underlying
177    /// reader. Otherwise it will just perform the repetition, or read the next literal.
178    pub fn advance<T: io::Read>(&self, reader: &mut T) -> Result<(u8, Self), OperationError> {
179        match self {
180            op if op.is_completed() => match Self::read_byte(reader)? {
181                128 => match Self::read_byte(reader) {
182                    Ok(byte) => Ok((byte, Operation::Literal(0))),
183                    Err(OperationError::UnexpectedEof) => {
184                        Err(OperationError::InsufficientInput(Command::Escape))
185                    }
186                    Err(e) => Err(e),
187                },
188                n if n < 128 => match Self::read_byte(reader) {
189                    Ok(byte) => Ok((byte, Operation::Literal(n))),
190                    Err(OperationError::UnexpectedEof) => {
191                        Err(OperationError::InsufficientInput(Command::Literal(n + 1)))
192                    }
193                    Err(e) => Err(e),
194                },
195                n => match Self::read_byte(reader) {
196                    Err(OperationError::UnexpectedEof) => Err(OperationError::InsufficientInput(
197                        Command::Repeat(256 - n as u16 + 1),
198                    )),
199                    Ok(byte) => Ok((byte, Operation::Repeat(byte, 256 - n as u16))),
200                    Err(e) => Err(e),
201                },
202            },
203            Operation::Repeat(value, count) => Ok((*value, Operation::Repeat(*value, count - 1))),
204            Operation::Literal(count) => match Self::read_byte(reader) {
205                Ok(byte) => Ok((byte, Self::Literal(count - 1))),
206                Err(OperationError::UnexpectedEof) => {
207                    Err(OperationError::InsufficientInput(Command::Literal(*count)))
208                }
209                Err(e) => Err(e),
210            },
211        }
212    }
213
214    #[inline]
215    /// Determines if the current operation has been advanced to completion.
216    pub fn is_completed(&self) -> bool {
217        matches!(self, Self::Literal(0) | Self::Repeat(_, 0))
218    }
219}
220
221#[cfg(test)]
222mod operation {
223    use crate::{Command, Operation, OperationError};
224    use std::io;
225
226    #[test]
227    fn read_literal() {
228        let mut reader = io::Cursor::new(b"\x01\xAB\xBC");
229        assert!(matches!(
230            Operation::default().advance(&mut reader),
231            Ok((0xab, Operation::Literal(1)))
232        ));
233    }
234
235    #[test]
236    fn read_repeat() {
237        let mut reader = io::Cursor::new(b"\xFF\xAB");
238        assert!(matches!(
239            Operation::default().advance(&mut reader),
240            Ok((0xab, Operation::Repeat(0xab, 1)))
241        ));
242    }
243
244    #[test]
245    fn read_escape() {
246        let mut reader = io::Cursor::new(b"\x80\xAB");
247        assert!(matches!(
248            Operation::default().advance(&mut reader),
249            Ok((0xAB, Operation::Repeat(_, 0) | Operation::Literal(0)))
250        ));
251    }
252
253    #[test]
254    fn read_partial_literal() {
255        let mut reader = io::Cursor::new(b"\x01");
256        assert!(matches!(
257            Operation::default().advance(&mut reader),
258            Err(OperationError::InsufficientInput(Command::Literal(2)))
259        ));
260    }
261
262    #[test]
263    fn read_partial_literal_to_completion() {
264        let mut reader = io::Cursor::new(b"\xAB");
265        assert!(matches!(
266            Command::Literal(2).execute(&mut reader),
267            Ok((0xab, Operation::Literal(1)))
268        ));
269    }
270
271    #[test]
272    fn read_partial_repeat() {
273        let mut reader = io::Cursor::new(b"\xFF");
274        assert!(matches!(
275            Operation::default().advance(&mut reader),
276            Err(OperationError::InsufficientInput(Command::Repeat(2)))
277        ));
278    }
279
280    #[test]
281    fn read_partial_repeat_to_completion() {
282        let mut reader = io::Cursor::new(b"\xAB");
283        assert!(matches!(
284            Command::Repeat(2).execute(&mut reader),
285            Ok((0xab, Operation::Repeat(0xab, 1)))
286        ));
287    }
288
289    #[test]
290    fn read_partial_escape() {
291        let mut reader = io::Cursor::new(b"\x80");
292        assert!(matches!(
293            Operation::default().advance(&mut reader),
294            Err(OperationError::InsufficientInput(Command::Escape))
295        ));
296    }
297
298    #[test]
299    fn read_partial_escape_to_completion() {
300        let mut reader = io::Cursor::new(b"\x80");
301        assert!(matches!(
302            Command::Escape.execute(&mut reader),
303            Ok((0x80, Operation::Literal(0) | Operation::Repeat(_, 0)))
304        ));
305    }
306}
307
308/// Unpack `input` into exactly `count` bytes
309pub fn unpack_exact(input: &[u8], count: usize) -> Result<Vec<u8>, Error> {
310    let mut output = Vec::with_capacity(count);
311    let mut remaining_output = count;
312    let mut i = 0;
313
314    loop {
315        if remaining_output == 0 {
316            return if i == input.len() {
317                Ok(output)
318            } else {
319                Err(Error::TooMuchInputData)
320            };
321        }
322
323        if i == input.len() {
324            return Err(Error::NotEnoughInputData);
325        }
326
327        i = match input[i].into() {
328            Command::Escape => i + 1,
329            Command::Literal(count) => {
330                if input.len() < i + count as usize {
331                    return Err(Error::NotEnoughInputData);
332                }
333
334                if count as usize > remaining_output {
335                    return Err(Error::TooMuchOutputData);
336                }
337
338                output.extend(&input[i + 1..(i + 1 + (count as usize))]);
339                remaining_output -= count as usize;
340                i + count as usize + 1
341            }
342            Command::Repeat(count) => {
343                if input.len() < i + 1 {
344                    return Err(Error::NotEnoughInputData);
345                }
346
347                if count as usize > remaining_output {
348                    return Err(Error::TooMuchOutputData);
349                }
350
351                output.extend(iter::repeat_n(input[i + 1], count as usize));
352                remaining_output -= count as usize;
353                i + 2
354            }
355        }
356    }
357}
358
359/// Unpack all data from `buffer` into a newly allocated vector
360pub fn unpack_buf(buffer: &[u8]) -> io::Result<Vec<u8>> {
361    unpack(io::Cursor::new(buffer))
362}
363
364/// Unpack all data from `reader` into a newly allocated vector
365pub fn unpack<R: io::Read>(mut reader: R) -> io::Result<Vec<u8>> {
366    let mut output = Vec::new();
367    let mut buf = [0u8];
368    loop {
369        let command = match reader.read_exact(&mut buf) {
370            Ok(()) => buf[0].into(),
371            Err(e) if e.kind() == ErrorKind::UnexpectedEof => return Ok(output),
372            Err(e) => return Err(e)?,
373        };
374
375        match command {
376            Command::Escape => {
377                reader.read_exact(&mut buf)?;
378                output.push(buf[0]);
379            }
380            Command::Literal(count) => {
381                let mut buffer = vec![0u8; count as usize];
382                reader.read_exact(&mut buffer)?;
383                output.append(&mut buffer);
384            }
385            Command::Repeat(count) => {
386                reader.read_exact(&mut buf)?;
387                output.reserve(count as usize);
388                for _ in 0..count {
389                    output.push(buf[0]);
390                }
391            }
392        }
393    }
394}
395
396/// Extension for [`std::io::Read`]ers to unpack _PackBits_ data
397pub trait PackBitsReaderExt {
398    /// Unpack PackBit data into the buffer, returning the number of bytes written to the buffer
399    fn read_packbits(&mut self, target: &mut [u8]) -> io::Result<usize>;
400}
401
402impl<T: io::Read> PackBitsReaderExt for T {
403    fn read_packbits(&mut self, target: &mut [u8]) -> io::Result<usize> {
404        let mut op = Operation::default();
405        for (idx, byte) in target.iter_mut().enumerate() {
406            match op.advance(self) {
407                Err(OperationError::InsufficientInput(_)) => {
408                    // TODO: We've read a command that can't be completed, this should be
409                    // communicated to users
410                    return Ok(idx);
411                }
412                Err(OperationError::UnexpectedEof) => return Ok(idx),
413                Err(other) => return Err(other)?,
414                Ok((value, next)) => {
415                    *byte = value;
416                    op = next;
417                }
418            }
419        }
420
421        Ok(target.len())
422    }
423}
424
425#[cfg(test)]
426mod test {
427    use crate::{unpack_buf, Error};
428
429    use super::{unpack, unpack_exact};
430
431    #[test]
432    fn canonical_example() {
433        let input = b"\xFE\xAA\x02\x80\x00\x2A\xFD\xAA\x03\x80\x00\x2A\x22\xF7\xAA";
434        let unpacked = b"\xAA\xAA\xAA\x80\x00\x2A\xAA\xAA\xAA\xAA\x80\x00\x2A\x22\xAA\xAA\xAA\xAA\xAA\xAA\xAA\xAA\xAA\xAA";
435
436        assert_eq!(unpack(input.as_slice()).unwrap(), unpacked);
437    }
438
439    #[test]
440    fn test_simple_buffer_expansion() {
441        let input = b"\xFE\xAA\x02\x80\x00\x2A\xFD\xAA\x03\x80\x00\x2A\x22\xF7\xAA";
442        let expectation = b"\xAA\xAA\xAA\x80\x00\x2A\xAA\xAA\xAA\xAA\x80\x00\x2A\x22\xAA\xAA\xAA\xAA\xAA\xAA\xAA\xAA\xAA\xAA";
443        let result = unpack_buf(input).unwrap();
444        assert_eq!(expectation.to_vec(), result);
445
446        let result = unpack_exact(input, 24);
447        assert_eq!(expectation.to_vec(), result.unwrap());
448    }
449
450    //#[test]
451    //fn test_clean_maximum_output() {
452    //let input = b"\xFE\xAA\x02\x80\x00\x2A\xFD\xAA\x03\x80\x00\x2A\x22\xF7\xAA\xF7\xAA";
453    //let expectation = b"\xAA\xAA\xAA\x80\x00\x2A\xAA\xAA\xAA\xAA\x80\x00\x2A\x22\xAA\xAA\xAA\xAA\xAA\xAA\xAA\xAA\xAA\xAA";
454
455    //let result = unpack_until(input, 24);
456    //assert_eq!((expectation.to_vec(), 15), result.unwrap());
457
458    //let input = b"";
459    //let result = unpack_until(input, 0);
460    //assert_eq!((Vec::<u8>::new(), 0), result.unwrap());
461    //}
462
463    #[test]
464    fn test_too_much_output() {
465        let input = b"\xFE\xAA\x02\x80\x00\x2A\xFD\xAA\x03\x80\x00\x2A\x22\xF7\xAA";
466
467        let result = unpack_exact(input, 20);
468        assert!(result.is_err_and(|e| matches!(e, Error::TooMuchOutputData)));
469    }
470
471    #[test]
472    fn test_not_enough_input() {
473        let input = b"\xFE\xAA\x02\x80\x00\x2A\xFD\xAA\x03\x80\x00\x2A\x22\xF7\xAA";
474
475        let result = unpack_exact(input, 26);
476        assert!(result.is_err_and(|e| matches!(e, Error::NotEnoughInputData)));
477
478        let input = b"";
479        let result = unpack_exact(input, 1);
480        assert!(result.is_err_and(|e| matches!(e, Error::NotEnoughInputData)));
481    }
482
483    mod operation {
484        use crate::{Command, Operation, OperationError};
485        use std::io;
486
487        #[test]
488        fn read_literal() {
489            let mut reader = io::Cursor::new(b"\x01\xAB\xBC");
490            assert!(matches!(
491                Operation::default().advance(&mut reader),
492                Ok((0xab, Operation::Literal(1)))
493            ));
494        }
495
496        #[test]
497        fn read_repeat() {
498            let mut reader = io::Cursor::new(b"\xFF\xAB");
499            assert!(matches!(
500                Operation::default().advance(&mut reader),
501                Ok((0xab, Operation::Repeat(0xab, 1)))
502            ));
503        }
504
505        #[test]
506        fn read_escape() {
507            let mut reader = io::Cursor::new(b"\x80\xAB");
508            assert!(matches!(
509                Operation::default().advance(&mut reader),
510                Ok((0xAB, Operation::Repeat(_, 0) | Operation::Literal(0)))
511            ));
512        }
513
514        #[test]
515        fn read_partial_literal() {
516            let mut reader = io::Cursor::new(b"\x01");
517            assert!(matches!(
518                Operation::default().advance(&mut reader),
519                Err(OperationError::InsufficientInput(Command::Literal(2)))
520            ));
521        }
522
523        #[test]
524        fn read_partial_literal_to_completion() {
525            let mut reader = io::Cursor::new(b"\xAB");
526            assert!(matches!(
527                Command::Literal(2).execute(&mut reader),
528                Ok((0xab, Operation::Literal(1)))
529            ));
530        }
531
532        #[test]
533        fn read_partial_repeat() {
534            let mut reader = io::Cursor::new(b"\xFF");
535            assert!(matches!(
536                Operation::default().advance(&mut reader),
537                Err(OperationError::InsufficientInput(Command::Repeat(2)))
538            ));
539        }
540
541        #[test]
542        fn read_partial_repeat_to_completion() {
543            let mut reader = io::Cursor::new(b"\xAB");
544            assert!(matches!(
545                Command::Repeat(2).execute(&mut reader),
546                Ok((0xab, Operation::Repeat(0xab, 1)))
547            ));
548        }
549
550        #[test]
551        fn read_partial_escape() {
552            let mut reader = io::Cursor::new(b"\x80");
553            assert!(matches!(
554                Operation::default().advance(&mut reader),
555                Err(OperationError::InsufficientInput(Command::Escape))
556            ));
557        }
558
559        #[test]
560        fn read_partial_escape_to_completion() {
561            let mut reader = io::Cursor::new(b"\x80");
562            assert!(matches!(
563                Command::Escape.execute(&mut reader),
564                Ok((0x80, Operation::Literal(0) | Operation::Repeat(_, 0)))
565            ));
566        }
567    }
568}