chisel_decoders/
lib.rs

1//! ## Overview
2//!
3//! This crate contains a very simple, lean implementations of decoders that will consume `u8` bytes from a given
4//! `Read` implementation, and decode into the Rust internal `char` type using either UTF-8 or ASCII.
5//!
6//! The decoder implementations are pretty fast and loose: under the covers they utilise some bit-twiddlin' in
7//! conjunction with the *unsafe* `transmute` function to do the conversions.
8//!
9//! *No string allocations are used during conversion*.
10//!
11//! ### Usage
12//!
13//! Usage is very simple, provided you have something that implements `Read` in order to source some bytes:
14//!
15//! ### Create from a slice
16//!
17//! Just wrap your array in a `mut` reader, and then plug it into a new instance of either `Utf8Decoder`:
18//!
19//! ```rust
20//!     # use std::io::BufReader;
21//!     # use chisel_decoders::utf8::Utf8Decoder;
22//!
23//!     let buffer: &[u8] = &[0x10, 0x12, 0x23, 0x12];
24//!     let mut reader = BufReader::new(buffer);
25//!     let _decoder = Utf8Decoder::new(&mut reader);
26//! ```
27//! If you're fairly certain that you're dealing with ASCII only, then just pick the `AsciiDecoder` instead:
28//!
29//! ```rust
30//!     # use std::io::BufReader;
31//!     # use chisel_decoders::ascii::AsciiDecoder;
32//!
33//!     let buffer: &[u8] = &[0x10, 0x12, 0x23, 0x12];
34//!     let mut reader = BufReader::new(buffer);
35//!     let _decoder = AsciiDecoder::new(&mut reader);
36//! ```
37//!
38//! ### Create from a file
39//!
40//! Just crack open your file, wrap in a `Read` instance and then plug into a new instance of `Utf8Decoder`:
41//!
42//! ```rust
43//!     # use std::fs::File;
44//!     # use std::io::BufReader;
45//!     # use std::path::PathBuf;
46//!     # use chisel_decoders::utf8::Utf8Decoder;
47//!
48//!     let path = PathBuf::from("./Cargo.toml");
49//!     let f = File::open(path);
50//!     let mut reader = BufReader::new(f.unwrap());
51//!     let _decoder = Utf8Decoder::new(&mut reader);
52//! ```
53//! ### Consuming Decoded `chars`
54//!
55//! Once you've created an instance of a specific decoder, you simply iterate over the `chars` in
56//! order to pull out the decoded characters (a decoder implements `Iterator<Item=char>`):
57//!
58//! ```rust
59//!     # use std::fs::File;
60//!     # use std::io::BufReader;
61//!     # use std::path::PathBuf;
62//!     # use chisel_decoders::utf8::Utf8Decoder;
63//!
64//!     let path = PathBuf::from("./Cargo.toml");
65//!     let f = File::open(path);
66//!     let mut reader = BufReader::new(f.unwrap());
67//!     let decoder = Utf8Decoder::new(&mut reader);
68//!     for c in decoder {
69//!        println!("char: {}", c)
70//!     }
71//! ```
72//!
73use std::io::BufRead;
74
75use crate::ascii::AsciiDecoder;
76use crate::utf8::Utf8Decoder;
77
78pub mod ascii;
79pub mod common;
80pub mod utf8;
81
82/// Enumeration of different supported encoding types
83#[derive(Copy, Clone)]
84pub enum Encoding {
85    Utf8,
86    Ascii,
87}
88
89/// Default encoding is UTF-8
90impl Default for Encoding {
91    fn default() -> Self {
92        Self::Utf8
93    }
94}
95
96/// Helper function for constructing a default decoder, wrapped around an input buffer
97pub fn default_decoder<'a, Buffer: BufRead>(
98    buffer: &'a mut Buffer,
99) -> Box<dyn Iterator<Item = char> + 'a> {
100    Box::new(Utf8Decoder::new(buffer))
101}
102
103/// Helper function for constructing a specific decoder, wrapped around an input buffer
104pub fn new_decoder<'a, Buffer: BufRead>(
105    buffer: &'a mut Buffer,
106    encoding: Encoding,
107) -> Box<dyn Iterator<Item = char> + 'a> {
108    match encoding {
109        Encoding::Ascii => Box::new(AsciiDecoder::new(buffer)),
110        Encoding::Utf8 => Box::new(Utf8Decoder::new(buffer)),
111    }
112}
113
114#[cfg(test)]
115mod lib {
116    use std::fs::File;
117    use std::io::BufReader;
118
119    use crate::{default_decoder, new_decoder, Encoding};
120
121    fn fuzz_file() -> File {
122        File::open("fixtures/fuzz.txt").unwrap()
123    }
124
125    #[test]
126    fn should_create_a_default_decoder() {
127        let mut reader = BufReader::new(fuzz_file());
128        let decoder = default_decoder(&mut reader);
129        assert!(decoder.count() > 0)
130    }
131
132    #[test]
133    fn should_create_a_new_ascii_decoder() {
134        let mut reader = BufReader::new(fuzz_file());
135        let decoder = new_decoder(&mut reader, Encoding::Ascii);
136        assert!(decoder.count() > 0)
137    }
138
139    #[test]
140    fn should_create_a_new_utf8_decoder() {
141        let mut reader = BufReader::new(fuzz_file());
142        let decoder = new_decoder(&mut reader, Encoding::Utf8);
143        assert!(decoder.count() > 0)
144    }
145}