string_inspector/
decoding.rs1use colored::*;
3use std::borrow::Cow;
4use encoding::types::EncodingRef;
5
6extern crate encoding;
7
8use encoding::{Encoding, DecoderTrap, EncoderTrap};
9
10const BYTE_DISPLAY_SIZE: u16 = 3;
11
12#[derive(Debug, Clone)]
14pub struct DecodedCharacter {
15 pub character: char,
16 pub bytes: Vec<u8>
17}
18
19impl DecodedCharacter {
20 fn width(&self) -> usize {
22 self.bytes.len() * BYTE_DISPLAY_SIZE as usize
23 }
24
25 fn new(character: char, encoding: &dyn Encoding) -> DecodedCharacter {
39 let bytes_for_character = encoding.encode(&character.to_string(), EncoderTrap::Replace).unwrap();
40 DecodedCharacter { character, bytes: bytes_for_character }
41 }
42
43 fn format_character(&self) -> String {
52 let char_size = self.width();
53 let character = self.character;
54
55 match character {
56 '\t' | '\r' | '\n' => {
57 let escaped = character.escape_default();
58 format!("{:width$} ", escaped, width = char_size)
59 }
60 '\u{20}'...'\u{7e}' => {
61 format!("{:width$}", character, width = char_size)
62 }
63 _ => {
64 let codepoint = format!("{:02x} ", character as u32);
65 format!("{:width$}", codepoint, width = char_size)
66 }
67 }
68 }
69
70 fn format_bytes(&self) -> String {
72 let mut buffer = String::new();
73 for byte in self.bytes.iter() {
74 let byte_hex = format!("{:02x} ", byte);
75 buffer.push_str(&byte_hex)
76 }
77 buffer
78 }
79}
80
81pub struct DecodedString {
83 pub encoding: &'static dyn Encoding,
84 pub characters: Vec<DecodedCharacter>
85}
86
87impl DecodedString {
88 pub fn decode(string: &[u8], encoding: EncodingRef) -> Result<DecodedString, Cow<'static, str>> {
95 match encoding.decode(string, DecoderTrap::Replace) {
96 Ok(result) => {
97 let characters = result.chars().map(|c| DecodedCharacter::new(c, encoding)).collect();
98 Ok(DecodedString {
99 encoding: encoding,
100 characters: characters
101 })
102 },
103 Err(msg) => Err(msg)
104 }
105 }
106
107 pub fn format_bytes(&self) -> String {
109 self.toggle_color(self.characters.iter().map(DecodedCharacter::format_bytes))
110 }
111
112 pub fn format_characters(&self) -> String {
121 self.toggle_color(self.characters.iter().map(DecodedCharacter::format_character))
122 }
123
124 fn toggle_color<I>(&self, iterator: I) -> String
125 where I: Iterator<Item = String>
126 {
127 let mut color_toggle = true;
128 let mut buffer = String::new();
129
130 for string in iterator {
131 if color_toggle {
132 buffer.push_str(&string.green().to_string());
133 } else {
134 buffer.push_str(&string.blue().to_string());
135 }
136 color_toggle = !color_toggle;
137 }
138 buffer
139 }
140
141 pub fn to_string(&self) -> String {
143 self.characters.iter().map(|c| c.character).collect()
144 }
145
146 pub fn wrap_lines(&self, max_line_width: usize) -> Vec<DecodedString> {
149 let mut lines = Vec::new();
150 let mut characters_in_line = Vec::new();
151 let mut line_size = 0;
152
153 for character in self.characters.iter() {
154 let char_output_width = character.width();
155 if line_size + char_output_width > max_line_width as usize {
156 lines.push(DecodedString {characters: characters_in_line, encoding: self.encoding});
157 characters_in_line = Vec::new();
158 line_size = 0;
159 }
160
161 characters_in_line.push(character.clone());
162 line_size += character.width();
163 }
164
165 if characters_in_line.len() > 0 {
166 lines.push(DecodedString {characters: characters_in_line, encoding: self.encoding});
167 }
168
169 lines
170 }
171}
172
173#[cfg(test)]
174mod tests {
175 use super::*;
176 use encoding::all::UTF_8;
177
178 #[test]
179 fn ascii_printables() {
180 colored::control::set_override(false);
181 let decoding = DecodedString::decode("!aA1".as_bytes(), UTF_8).unwrap();
182 assert_eq!(decoding.format_bytes(), "21 61 41 31 ");
183 assert_eq!(decoding.format_characters(), "! a A 1 ");
184 }
185
186 #[test]
187 fn ascii_escapables() {
188 colored::control::set_override(false);
189 let decoding = DecodedString::decode("\n\r\t".as_bytes(), UTF_8).unwrap();
190 assert_eq!(decoding.format_bytes(), "0a 0d 09 ");
191 assert_eq!(decoding.format_characters(), "\\n \\r \\t ");
192 }
193
194 #[test]
195 fn ascii_non_printables() {
196 colored::control::set_override(false);
197 let decoding = DecodedString::decode("\u{00}\u{7f}".as_bytes(), UTF_8).unwrap();
198 assert_eq!(decoding.format_bytes(), "00 7f ");
199 assert_eq!(decoding.format_characters(), "00 7f ");
200 }
201
202 #[test]
203 fn extra_latin_letters() {
204 colored::control::set_override(false);
205 let decoding = DecodedString::decode("éß".as_bytes(), UTF_8).unwrap();
206 assert_eq!(decoding.format_bytes(), "c3 a9 c3 9f ");
207 assert_eq!(decoding.format_characters(), "e9 df ");
208 }
209
210 #[test]
211 fn display_width_single_byte() {
212 let decoded_character = DecodedCharacter {character: 'a', bytes: "a".as_bytes().to_owned()};
213 assert_eq!(decoded_character.width(), 3);
214 }
215
216 #[test]
217 fn display_width_two_bytes() {
218 let decoded_character = DecodedCharacter {character: 'ß', bytes: "ß".as_bytes().to_owned()};
219 assert_eq!(decoded_character.width(), 6);
220 }
221
222 #[test]
223 fn line_wrapping_if_it_fits() {
224 colored::control::set_override(false);
225 let text = "aaaaa";
226 let screen_width = 15;
227 let decoding = DecodedString::decode(text.as_bytes(), UTF_8).unwrap();
228 assert_eq!(decoding.format_bytes(), "61 61 61 61 61 ");
229 assert_eq!(decoding.format_characters(), "a a a a a ");
230
231 let lines = decoding.wrap_lines(screen_width);
232 assert_eq!(lines.len(), 1);
233 assert_eq!(lines[0].format_bytes(), "61 61 61 61 61 ");
234 assert_eq!(lines[0].format_characters(), "a a a a a ");
235 }
236
237 #[test]
238 fn line_wrapping_wraps_to_exact_number_of_lines() {
239 colored::control::set_override(false);
240 let text = "aaaaabbbbb";
241 let screen_width = 15;
242 let decoding = DecodedString::decode(text.as_bytes(), UTF_8).unwrap();
243 let lines = decoding.wrap_lines(screen_width);
244
245 assert_eq!(lines.len(), 2);
246
247 assert_eq!(lines[0].format_bytes(), "61 61 61 61 61 ");
248 assert_eq!(lines[0].format_characters(), "a a a a a ");
249
250 assert_eq!(lines[1].format_bytes(), "62 62 62 62 62 ");
251 assert_eq!(lines[1].format_characters(), "b b b b b ");
252 }
253
254 #[test]
255 fn line_wrapping_wraps_to_inexact_number_of_lines() {
256 colored::control::set_override(false);
257 let text = "aaaaabbbbbcc";
258 let screen_width = 15;
259 let decoding = DecodedString::decode(text.as_bytes(), UTF_8).unwrap();
260 let lines = decoding.wrap_lines(screen_width);
261
262 assert_eq!(lines.len(), 3);
263
264 assert_eq!(lines[0].format_bytes(), "61 61 61 61 61 ");
265 assert_eq!(lines[0].format_characters(), "a a a a a ");
266
267 assert_eq!(lines[1].format_bytes(), "62 62 62 62 62 ");
268 assert_eq!(lines[1].format_characters(), "b b b b b ");
269
270 assert_eq!(lines[2].format_bytes(), "63 63 ");
271 assert_eq!(lines[2].format_characters(), "c c ");
272 }
273}