chisel_decoders/
ascii.rs

1#![allow(dead_code)]
2#![allow(clippy::transmute_int_to_char)]
3//! A character-oriented decoder implementation that will take an underlying [std::u8] (byte) source
4//! and produce a stream of decoded ASCII characters
5use std::io::BufRead;
6use std::mem::transmute;
7
8use crate::common::*;
9use crate::decoder_error;
10
11/// An ASCII decoder, which takes a ref to a [BufRead] instance.
12pub struct AsciiDecoder<'a, B: BufRead> {
13    /// The input stream
14    input: &'a mut B,
15    /// Staging buffer
16    buffer: Vec<u8>,
17    /// Initialisation flag
18    init: bool,
19    /// The current index into the input
20    index: usize,
21}
22
23impl<'a, Buffer: BufRead> AsciiDecoder<'a, Buffer> {
24    /// Create a new decoder with a default buffer size
25    pub fn new(r: &'a mut Buffer) -> Self {
26        AsciiDecoder {
27            input: r,
28            buffer: vec![],
29            init: false,
30            index: 0,
31        }
32    }
33
34    /// Initialise and read the input into an internal buffer for decoding
35    fn init(&mut self) -> DecoderResult<()> {
36        match self.input.read_to_end(&mut self.buffer) {
37            Ok(_) => {
38                self.init = true;
39                Ok(())
40            }
41            Err(_) => Err(decoder_error!(
42                DecoderErrorCode::StreamFailure,
43                "failed to read input"
44            )),
45        }
46    }
47
48    /// Attempt to decode the next character in the underlying stream.
49    fn decode_next(&mut self) -> DecoderResult<char> {
50        if !self.init {
51            self.init()?;
52        }
53
54        if self.index >= self.buffer.len() {
55            return Err(decoder_error!(
56                DecoderErrorCode::EndOfInput,
57                "end of input reached"
58            ));
59        }
60
61        if self.buffer[self.index] >> 7 == 0 {
62            return unsafe {
63                self.index += 1;
64                Ok(transmute(self.buffer[self.index - 1] as u32))
65            };
66        } else {
67            return Err(decoder_error!(
68                DecoderErrorCode::OutOfRange,
69                "non-ascii character detected"
70            ));
71        }
72    }
73}
74
75impl<'a, B: BufRead> Iterator for AsciiDecoder<'a, B> {
76    type Item = char;
77    /// Decode the next character from the underlying stream
78    fn next(&mut self) -> Option<Self::Item> {
79        match self.decode_next() {
80            Ok(c) => Some(c),
81            Err(_) => None,
82        }
83    }
84}
85
86#[cfg(test)]
87mod tests {
88    use std::fs::File;
89    use std::io::BufReader;
90    use std::time::Instant;
91
92    use crate::ascii::AsciiDecoder;
93    use crate::common::DecoderErrorCode;
94
95    fn utf8_fuzz_file() -> File {
96        File::open("fixtures/fuzz.txt").unwrap()
97    }
98    fn ascii_fuzz_file() -> File {
99        File::open("fixtures/json/bench/ascii/asciiart.json").unwrap()
100    }
101    fn complex_file() -> File {
102        File::open("fixtures/json/bench/utf8/twitter.json").unwrap()
103    }
104
105    #[test]
106    fn can_create_from_array() {
107        let buffer: &[u8] = &[0x10, 0x12, 0x23, 0x12];
108        let mut reader = BufReader::new(buffer);
109        let mut decoder = AsciiDecoder::new(&mut reader);
110        let mut _count = 0;
111        while decoder.decode_next().is_ok() {
112            _count += 1;
113        }
114    }
115
116    #[test]
117    fn can_create_from_file() {
118        let mut reader = BufReader::new(utf8_fuzz_file());
119        let _decoder = AsciiDecoder::new(&mut reader);
120    }
121
122    #[test]
123    fn should_out_of_range_utf8() {
124        let mut reader = BufReader::new(utf8_fuzz_file());
125        let mut decoder = AsciiDecoder::new(&mut reader);
126        loop {
127            match decoder.decode_next() {
128                Ok(_) => (),
129                Err(e) => {
130                    assert_eq!(e.code, DecoderErrorCode::OutOfRange);
131                    break;
132                }
133            }
134        }
135    }
136
137    #[test]
138    fn should_pass_an_ascii_fuzz_test() {
139        let mut reader = BufReader::new(ascii_fuzz_file());
140        let mut decoder = AsciiDecoder::new(&mut reader);
141        let mut count = 0;
142        while decoder.decode_next().is_ok() {
143            count += 1;
144        }
145        assert_eq!(count, 6406307);
146    }
147
148    #[test]
149    fn should_be_an_iterator() {
150        let start = Instant::now();
151        let mut reader = BufReader::new(ascii_fuzz_file());
152        let decoder = AsciiDecoder::new(&mut reader);
153        assert_eq!(decoder.count(), 6406307);
154        println!("Counted fuzz file in {:?}", start.elapsed());
155    }
156}