advent_ocr/
lib.rs

1//! A function to convert ASCII-art representations of letters generated by Advent of Code 
2//! puzzles into a String containing those letters.
3
4use std::collections::HashMap;
5pub mod scannable;
6pub use crate::scannable::Scannable;
7
8const FONT6: &str = include_str!("../res/font6.txt");
9const FONT10: &str = include_str!("../res/font10.txt");
10
11/// Takes an image containing Advent of Code's ASCII-art letter representations and converts 
12/// it to a standard String.
13/// 
14/// `image` is a `Scannable,` which is a marker trait that is implemented for the following
15/// types:
16/// * `&str`
17/// * `(&Vec<bool>, usize)`, a tuple consisting of a Vec of bools and the width of a line.
18/// * `(&Vec<bool>, usize)`, a tuple consisting of a Vec of chars and the width of a line.
19/// * `&Vec<Vec<bool>>`, a Vec of a Vec of bools.
20/// * `&Vec<Vec<char>>`, a Vec of a Vec of chars.
21/// 
22/// For &str and the char-based Vecs, '#' is considered part of a letter and all other chars 
23/// are considered blank space. For the bool-based Vecs, `true` is considered part of a 
24/// letter and `false` is considered blank space.
25/// 
26/// # Example
27/// 
28/// ```
29/// use advent_ocr::ocr;
30/// 
31/// let image = r"
32/// .##..###...##.
33/// ##..#.#..#.#..#
34/// ##..#.###..#...
35/// #####.#..#.#...
36/// ##..#.#..#.#..#
37/// ##..#.###...##.
38///     ";
39/// 
40/// let s = ocr(image).unwrap();
41/// assert_eq!(s, "ABC");
42/// ```
43pub fn ocr<T: Scannable>(image: T) -> Option<String> {
44    let image = image
45        .normalize()
46        .replace("\r\n", "\n");
47    let image = image.trim();
48    let ids = map_to_id(image)?;
49    let letter_map = get_letter_map();
50    let ocr = ids.iter()
51        .map(|id| letter_map.get(id).unwrap_or(&'?'))
52        .collect();
53    Some(ocr)
54}
55
56fn map_to_id(image: &str) -> Option<Vec<u64>> {
57    let width = image.find('\n')?;
58    let height = (image.len() + 1) / width;
59    
60    // Check if font is proper height; otherwise return early
61    if height != 6 && height != 10 { return None; }
62    
63    let image = image.as_bytes();
64    
65    // Check that image is rectangular; i.e., each line has the same width.
66    if (0..height - 1).any(|y| image[y * (width + 1) + width] != b'\n') {
67        println!("{}, {}", image.len(), (width + 1) * height - 1);
68        return None;
69    }
70    
71    // Id takes a letter column by column, and converts it into a bitset that gets written
72    // to the Vec once the letter ends.
73    let mut id = 0u64;
74
75    // All letters have a blank column between them, except in one notable instance, a font6
76    // "Y" is followed by another letter with no blank column in between, presumably a bug.
77    // I have a special case for handling that, which requires tracking the width of the 
78    // letter.
79    let mut letter_width = 0usize;
80
81    let mut ids = Vec::new();
82
83    // Go by columns...
84    for x in 0..width {
85        let col: Vec<bool> = (0..height)
86            .map(|y| {
87                let symbol = image[x + y * (width + 1)];
88                symbol == b'#' || symbol == b'*'
89            })
90            .collect();
91        
92        // if the column is blank, write the bitset to ids and reset back to 0.
93        if col.iter().all(|&b| !b) { 
94            if id != 0 { ids.push(id); }
95            id = 0;
96            letter_width = 0;
97        } else {
98            // "Y bug" handling. "Y" is the only letter of width 5, so push regardless of
99            // whether there is a blank line following it.
100            if height == 6 && letter_width == 5 {
101                ids.push(id);
102                id = 0;
103                letter_width = 0;
104            }
105
106            // Add to bitset and increment letter width.
107            id = col.iter()
108                .fold(id, |acc, &b| (acc << 1) + if b { 1 } else { 0 });
109            letter_width += 1;
110        }
111    }
112
113    // clear the hopper
114    if id != 0 { ids.push(id) };
115    Some(ids)
116}
117
118fn get_letter_map() -> HashMap<u64, char> {
119    let (letters6, letter_forms6) = FONT6
120        .split_once("\n\n")
121        .unwrap();
122
123    let (letters10, letter_forms10) = FONT10
124        .split_once("\n\n")
125        .unwrap();
126
127    let mut letter_map = HashMap::new();
128
129    populate_letter_map(&mut letter_map, letter_forms6, letters6);
130    populate_letter_map(&mut letter_map, letter_forms10, letters10);
131
132    letter_map
133}
134
135fn populate_letter_map(
136    letter_map: &mut HashMap<u64, char>, 
137    letter_forms: &str, 
138    letters: &str
139) {
140    map_to_id(letter_forms.trim())
141        .unwrap()
142        .iter()
143        .zip(letters.chars()) 
144        .for_each(|(&id, c)| { letter_map.insert(id, c); });
145}
146
147#[cfg(test)]
148mod tests {
149    use super::*;
150    fn ocr_test<T: Scannable>(output: &str, letter_forms: T) -> bool {
151        Some(output.to_string()) == ocr(letter_forms)
152    }
153
154
155    #[test]
156    fn size6() {
157        let output = "ABCEFGHIJKLOPRSUYZ";
158        let letter_forms = r"
159.##..###...##..####.####..##..#..#.###...##.#..#.#.....##..###..###...###.#..#.#...#.####
160#..#.#..#.#..#.#....#....#..#.#..#..#.....#.#.#..#....#..#.#..#.#..#.#....#..#.#...#....#
161#..#.###..#....###..###..#....####..#.....#.##...#....#..#.#..#.#..#.#....#..#..#.#....#.
162####.#..#.#....#....#....#.##.#..#..#.....#.#.#..#....#..#.###..###...##..#..#...#....#..
163#..#.#..#.#..#.#....#....#..#.#..#..#..#..#.#.#..#....#..#.#....#.#.....#.#..#...#...#...
164#..#.###...##..####.#.....###.#..#.###..##..#..#.####..##..#....#..#.###...##....#...####        
165        ";
166        assert!(ocr_test(output, letter_forms));
167    }
168
169    #[test]
170    fn size10() {
171        let output = "ABCEFGHJKLNPRXZ";
172        let letter_forms = r"
173..##...#####...####..######.######..####..#....#....###.#....#.#......#....#.#####..#####..#....#.######
174.#..#..#....#.#....#.#......#......#....#.#....#.....#..#...#..#......##...#.#....#.#....#.#....#......#
175#....#.#....#.#......#......#......#......#....#.....#..#..#...#......##...#.#....#.#....#..#..#.......#
176#....#.#....#.#......#......#......#......#....#.....#..#.#....#......#.#..#.#....#.#....#..#..#......#.
177#....#.#####..#......#####..#####..#......######.....#..##.....#......#.#..#.#####..#####....##......#..
178######.#....#.#......#......#......#..###.#....#.....#..##.....#......#..#.#.#......#..#.....##.....#...
179#....#.#....#.#......#......#......#....#.#....#.....#..#.#....#......#..#.#.#......#...#...#..#...#....
180#....#.#....#.#......#......#......#....#.#....#.#...#..#..#...#......#...##.#......#...#...#..#..#.....
181#....#.#....#.#....#.#......#......#...##.#....#.#...#..#...#..#......#...##.#......#....#.#....#.#.....
182#....#.#####...####..######.#.......###.#.#....#..###...#....#.######.#....#.#......#....#.#....#.######
183        ";
184        assert!(ocr_test(output, letter_forms));
185    }
186
187    #[test]
188    fn y_bug() {
189        let output = "LGYHB";
190        let letter_forms = r"
191#.....##..#...##..#.###..
192#....#..#.#...##..#.#..#.
193#....#.....#.#.####.###..
194#....#.##...#..#..#.#..#.
195#....#..#...#..#..#.#..#.
196####..###...#..#..#.###..
197        ";
198        assert!(ocr_test(output, letter_forms));
199    }
200
201    #[test]
202    fn bool_vec() {
203        let output = "LGYHB";
204        let letter_forms = r"
205#.....##..#...##..#.###..
206#....#..#.#...##..#.#..#.
207#....#.....#.#.####.###..
208#....#.##...#..#..#.#..#.
209#....#..#...#..#..#.#..#.
210####..###...#..#..#.###..
211        ".trim();
212        let width = letter_forms.find('\n').unwrap();
213        let bools: Vec<_> = letter_forms.as_bytes().iter()
214            .filter(|&&c| c != b'\n')
215            .map(|&c| c == b'#')
216            .collect();
217        assert!(ocr_test(output, (&bools, width)));
218    }
219
220    #[test]
221    fn char_vec() {
222        let output = "ABCEFGHJKLNPRXZ";
223        let letter_forms = r"
224..##...#####...####..######.######..####..#....#....###.#....#.#......#....#.#####..#####..#....#.######
225.#..#..#....#.#....#.#......#......#....#.#....#.....#..#...#..#......##...#.#....#.#....#.#....#......#
226#....#.#....#.#......#......#......#......#....#.....#..#..#...#......##...#.#....#.#....#..#..#.......#
227#....#.#....#.#......#......#......#......#....#.....#..#.#....#......#.#..#.#....#.#....#..#..#......#.
228#....#.#####..#......#####..#####..#......######.....#..##.....#......#.#..#.#####..#####....##......#..
229######.#....#.#......#......#......#..###.#....#.....#..##.....#......#..#.#.#......#..#.....##.....#...
230#....#.#....#.#......#......#......#....#.#....#.....#..#.#....#......#..#.#.#......#...#...#..#...#....
231#....#.#....#.#......#......#......#....#.#....#.#...#..#..#...#......#...##.#......#...#...#..#..#.....
232#....#.#....#.#....#.#......#......#...##.#....#.#...#..#...#..#......#...##.#......#....#.#....#.#.....
233#....#.#####...####..######.#.......###.#.#....#..###...#....#.######.#....#.#......#....#.#....#.######
234        ".trim();
235        let width = letter_forms.find('\n').unwrap();
236        let chars: Vec<_> = letter_forms.chars()
237            .filter(|&c| c != '\n')
238            .collect();
239        assert!(ocr_test(output, (&chars, width)));
240    }
241
242    #[test]
243    fn vec_vec_char() {
244        let output = "ABCEFGHJKLNPRXZ";
245        let letter_forms = r"
246..##...#####...####..######.######..####..#....#....###.#....#.#......#....#.#####..#####..#....#.######
247.#..#..#....#.#....#.#......#......#....#.#....#.....#..#...#..#......##...#.#....#.#....#.#....#......#
248#....#.#....#.#......#......#......#......#....#.....#..#..#...#......##...#.#....#.#....#..#..#.......#
249#....#.#....#.#......#......#......#......#....#.....#..#.#....#......#.#..#.#....#.#....#..#..#......#.
250#....#.#####..#......#####..#####..#......######.....#..##.....#......#.#..#.#####..#####....##......#..
251######.#....#.#......#......#......#..###.#....#.....#..##.....#......#..#.#.#......#..#.....##.....#...
252#....#.#....#.#......#......#......#....#.#....#.....#..#.#....#......#..#.#.#......#...#...#..#...#....
253#....#.#....#.#......#......#......#....#.#....#.#...#..#..#...#......#...##.#......#...#...#..#..#.....
254#....#.#....#.#....#.#......#......#...##.#....#.#...#..#...#..#......#...##.#......#....#.#....#.#.....
255#....#.#####...####..######.#.......###.#.#....#..###...#....#.######.#....#.#......#....#.#....#.######
256        ".trim();
257        let chars: Vec<Vec<char>> = letter_forms.lines()
258            .map(|line| line.chars().collect())
259            .collect();
260        assert!(ocr_test(output, &chars));
261    }
262
263    #[test]
264    fn vec_vec_bool() {
265        let output = "ABCEFGHJKLNPRXZ";
266        let letter_forms = r"
267..##...#####...####..######.######..####..#....#....###.#....#.#......#....#.#####..#####..#....#.######
268.#..#..#....#.#....#.#......#......#....#.#....#.....#..#...#..#......##...#.#....#.#....#.#....#......#
269#....#.#....#.#......#......#......#......#....#.....#..#..#...#......##...#.#....#.#....#..#..#.......#
270#....#.#....#.#......#......#......#......#....#.....#..#.#....#......#.#..#.#....#.#....#..#..#......#.
271#....#.#####..#......#####..#####..#......######.....#..##.....#......#.#..#.#####..#####....##......#..
272######.#....#.#......#......#......#..###.#....#.....#..##.....#......#..#.#.#......#..#.....##.....#...
273#....#.#....#.#......#......#......#....#.#....#.....#..#.#....#......#..#.#.#......#...#...#..#...#....
274#....#.#....#.#......#......#......#....#.#....#.#...#..#..#...#......#...##.#......#...#...#..#..#.....
275#....#.#....#.#....#.#......#......#...##.#....#.#...#..#...#..#......#...##.#......#....#.#....#.#.....
276#....#.#####...####..######.#.......###.#.#....#..###...#....#.######.#....#.#......#....#.#....#.######
277        ".trim();
278        let bools: Vec<Vec<bool>> = letter_forms.lines()
279            .map(|line| { 
280                line.chars().map(|c| c == '#').collect()
281            }).collect();
282        assert!(ocr_test(output, &bools));
283    }
284
285    #[test]
286    fn size5_should_fail() {
287        let letter_forms = r"
288.##..###...##..####.####..##..#..#.###...##.#..#.#.....##..###..###...###.#..#.#...#.####
289#..#.#..#.#..#.#....#....#..#.#..#..#.....#.#.#..#....#..#.#..#.#..#.#....#..#.#...#....#
290#..#.###..#....###..###..#....####..#.....#.##...#....#..#.#..#.#..#.#....#..#..#.#....#.
291####.#..#.#....#....#....#.##.#..#..#.....#.#.#..#....#..#.###..###...##..#..#...#....#..
292#..#.#..#.#..#.#....#....#..#.#..#..#..#..#.#.#..#....#..#.#....#.#.....#.#..#...#...#...
293        ";
294        assert_eq!(None, ocr(letter_forms));
295    }
296
297    #[test]
298    fn not_square_should_fail() {
299        let letter_forms = r"
300#.....##..#...##..#.###..
301#....#..#.#...##..#.#..#.
302#....#.....#.#.####.###
303#....#.##...#..#..#.#..#...
304#....#..#...#..#..#.#..#.
305####..###...#..#..#.###..
306        ";
307        assert_eq!(None, ocr(letter_forms));
308    }
309
310    #[test]
311    fn rectangular() {
312        let letter_forms = r"
313**** *  *   ** **** ***    ** **** ****
314   * * *     * *    *  *    * *       *
315  *  **      * ***  ***     * ***    * 
316 *   * *     * *    *  *    * *     *  
317*    * *  *  * *    *  * *  * *    *   
318**** *  *  **  *    ***   **  *    ****
319        ";
320        let output = "ZKJFBJFZ";
321        assert!(ocr_test(output, letter_forms));
322    }
323
324}