chisel_decoders/lib.rs
1//! ## Overview
2//!
3//! This crate contains a very simple, lean implementations of decoders that will consume `u8` bytes from a given
4//! `Read` implementation, and decode into the Rust internal `char` type using either UTF-8 or ASCII.
5//!
6//! The decoder implementations are pretty fast and loose: under the covers they utilise some bit-twiddlin' in
7//! conjunction with the *unsafe* `transmute` function to do the conversions.
8//!
9//! *No string allocations are used during conversion*.
10//!
11//! ### Usage
12//!
13//! Usage is very simple, provided you have something that implements `Read` in order to source some bytes:
14//!
15//! ### Create from a slice
16//!
17//! Just wrap your array in a `mut` reader, and then plug it into a new instance of either `Utf8Decoder`:
18//!
19//! ```rust
20//! # use std::io::BufReader;
21//! # use chisel_decoders::utf8::Utf8Decoder;
22//!
23//! let buffer: &[u8] = &[0x10, 0x12, 0x23, 0x12];
24//! let mut reader = BufReader::new(buffer);
25//! let _decoder = Utf8Decoder::new(&mut reader);
26//! ```
27//! If you're fairly certain that you're dealing with ASCII only, then just pick the `AsciiDecoder` instead:
28//!
29//! ```rust
30//! # use std::io::BufReader;
31//! # use chisel_decoders::ascii::AsciiDecoder;
32//!
33//! let buffer: &[u8] = &[0x10, 0x12, 0x23, 0x12];
34//! let mut reader = BufReader::new(buffer);
35//! let _decoder = AsciiDecoder::new(&mut reader);
36//! ```
37//!
38//! ### Create from a file
39//!
40//! Just crack open your file, wrap in a `Read` instance and then plug into a new instance of `Utf8Decoder`:
41//!
42//! ```rust
43//! # use std::fs::File;
44//! # use std::io::BufReader;
45//! # use std::path::PathBuf;
46//! # use chisel_decoders::utf8::Utf8Decoder;
47//!
48//! let path = PathBuf::from("./Cargo.toml");
49//! let f = File::open(path);
50//! let mut reader = BufReader::new(f.unwrap());
51//! let _decoder = Utf8Decoder::new(&mut reader);
52//! ```
53//! ### Consuming Decoded `chars`
54//!
55//! Once you've created an instance of a specific decoder, you simply iterate over the `chars` in
56//! order to pull out the decoded characters (a decoder implements `Iterator<Item=char>`):
57//!
58//! ```rust
59//! # use std::fs::File;
60//! # use std::io::BufReader;
61//! # use std::path::PathBuf;
62//! # use chisel_decoders::utf8::Utf8Decoder;
63//!
64//! let path = PathBuf::from("./Cargo.toml");
65//! let f = File::open(path);
66//! let mut reader = BufReader::new(f.unwrap());
67//! let decoder = Utf8Decoder::new(&mut reader);
68//! for c in decoder {
69//! println!("char: {}", c)
70//! }
71//! ```
72//!
73use std::io::BufRead;
74
75use crate::ascii::AsciiDecoder;
76use crate::utf8::Utf8Decoder;
77
78pub mod ascii;
79pub mod common;
80pub mod utf8;
81
82/// Enumeration of different supported encoding types
83#[derive(Copy, Clone)]
84pub enum Encoding {
85 Utf8,
86 Ascii,
87}
88
89/// Default encoding is UTF-8
90impl Default for Encoding {
91 fn default() -> Self {
92 Self::Utf8
93 }
94}
95
96/// Helper function for constructing a default decoder, wrapped around an input buffer
97pub fn default_decoder<'a, Buffer: BufRead>(
98 buffer: &'a mut Buffer,
99) -> Box<dyn Iterator<Item = char> + 'a> {
100 Box::new(Utf8Decoder::new(buffer))
101}
102
103/// Helper function for constructing a specific decoder, wrapped around an input buffer
104pub fn new_decoder<'a, Buffer: BufRead>(
105 buffer: &'a mut Buffer,
106 encoding: Encoding,
107) -> Box<dyn Iterator<Item = char> + 'a> {
108 match encoding {
109 Encoding::Ascii => Box::new(AsciiDecoder::new(buffer)),
110 Encoding::Utf8 => Box::new(Utf8Decoder::new(buffer)),
111 }
112}
113
114#[cfg(test)]
115mod lib {
116 use std::fs::File;
117 use std::io::BufReader;
118
119 use crate::{default_decoder, new_decoder, Encoding};
120
121 fn fuzz_file() -> File {
122 File::open("fixtures/fuzz.txt").unwrap()
123 }
124
125 #[test]
126 fn should_create_a_default_decoder() {
127 let mut reader = BufReader::new(fuzz_file());
128 let decoder = default_decoder(&mut reader);
129 assert!(decoder.count() > 0)
130 }
131
132 #[test]
133 fn should_create_a_new_ascii_decoder() {
134 let mut reader = BufReader::new(fuzz_file());
135 let decoder = new_decoder(&mut reader, Encoding::Ascii);
136 assert!(decoder.count() > 0)
137 }
138
139 #[test]
140 fn should_create_a_new_utf8_decoder() {
141 let mut reader = BufReader::new(fuzz_file());
142 let decoder = new_decoder(&mut reader, Encoding::Utf8);
143 assert!(decoder.count() > 0)
144 }
145}