sub_solver/
input.rs

1use std::collections::{HashMap, HashSet};
2
3use unidecode::unidecode;
4
5use crate::{normalize, Word};
6
7const ALPHABET: &str = "abcdefghijklmnopqrstuvwxyz";
8
9/// Clean the input string into a consistent format
10/// - Remove all non-alphabetic characters (only keep spaces)
11/// - Convert all characters to lowercase
12/// - Trim leading and trailing whitespace
13/// - Remove duplicate spaces
14/// - Normalize unicode characters
15pub fn clean_input(input: &str) -> String {
16    unidecode(input)
17        .chars()
18        .map(|c| {
19            if c.is_ascii_alphabetic() {
20                c.to_ascii_lowercase()
21            } else {
22                ' '
23            }
24        })
25        // .filter(|c| c.is_ascii_alphabetic() || c == &' ')
26        // .map(|c| c.to_ascii_lowercase())
27        .collect::<String>()
28        .split_whitespace()
29        .collect::<Vec<&str>>()
30        .join(" ")
31}
32
33/// Parse the input string into a vector of `Word`s.
34/// Returns `None` if the input string contains a word that is not possible in the dictionary
35pub fn input_to_words(
36    input: &str,
37    dictionary: &HashMap<String, HashSet<String>>,
38) -> Result<Vec<Word>, String> {
39    let mut result = Vec::new();
40
41    for word in input.split_whitespace() {
42        if let Some(candidates) = dictionary.get(&normalize(word)) {
43            result.push(Word::new(word, candidates));
44        } else {
45            return Err(format!("Word {word:?} is not possible in the dictionary"));
46        }
47    }
48
49    Ok(result)
50}
51
52pub fn parse_key(key: &str) -> Result<HashMap<char, char>, String> {
53    if key.contains('?') {
54        // Key is in wildcard format (example: "b?d?f?????????????????????")
55        let mut result = HashMap::new();
56        for (a, b) in ALPHABET.chars().zip(key.chars()) {
57            if b != '?' {
58                if !ALPHABET.contains(b) {
59                    return Err(format!(
60                        "Invalid key character: {b:?} (should be in lowercase alphabet)"
61                    ));
62                }
63                if let Some((dup_key, value)) = result.iter().find(|(_, v)| **v == b) {
64                    return Err(format!(
65                        "Duplicate mapping of {value:?} to {dup_key:?} and {a:?}"
66                    ));
67                }
68                result.insert(a, b);
69            }
70        }
71        Ok(result)
72    } else {
73        // Key is in delimiter format (example: "a:b,c:d,e:f" or "ab,cd,ef")
74        let mut result = HashMap::new();
75        for pair in key.split(',') {
76            let pair = pair.chars().collect::<Vec<char>>();
77            let (&a, &b) = (
78                pair.first()
79                    .ok_or(format!("No first character in key: {key:?}"))?,
80                pair.last()
81                    .ok_or(format!("No last character in key: {key:?}"))?,
82            );
83
84            if !ALPHABET.contains(a) {
85                return Err(format!(
86                    "Invalid key character: {a:?} (should be in lowercase alphabet)"
87                ));
88            } else if !ALPHABET.contains(b) {
89                return Err(format!(
90                    "Invalid key character: {b:?} (should be in lowercase alphabet)"
91                ));
92            }
93            if result.contains_key(&a) {
94                return Err(format!("Duplicate key character: {a:?}"));
95            }
96            if let Some((dup_key, value)) = result.iter().find(|(_, v)| **v == b) {
97                return Err(format!(
98                    "Duplicate mapping of {value:?} to {dup_key:?} and {a:?}"
99                ));
100            }
101            result.insert(a, b);
102        }
103        Ok(result)
104    }
105}
106
107#[cfg(test)]
108mod tests {
109    use super::*;
110
111    #[test]
112    fn clean_input_tests() {
113        assert_eq!(clean_input("Hello, world!"), "hello world");
114        assert_eq!(clean_input("Hello, world! 123"), "hello world");
115        assert_eq!(clean_input("  some   spaces   "), "some spaces");
116        assert_eq!(clean_input("Oké Måns"), "oke mans");
117        assert_eq!(clean_input("Æneid"), "aeneid");
118        assert_eq!(clean_input("test\nword"), "test word");
119        assert_eq!(
120            clean_input("something.\n\nnow other."),
121            "something now other"
122        );
123    }
124
125    #[test]
126    fn parse_key_tests() {
127        assert_eq!(
128            parse_key("a:b,c:d,e:f").unwrap(),
129            [('a', 'b'), ('c', 'd'), ('e', 'f')]
130                .iter()
131                .cloned()
132                .collect()
133        );
134        assert_eq!(
135            parse_key("ab,cd,ef").unwrap(),
136            [('a', 'b'), ('c', 'd'), ('e', 'f')]
137                .iter()
138                .cloned()
139                .collect()
140        );
141        assert_eq!(
142            parse_key("b?d?f?????????????????????????").unwrap(),
143            [('a', 'b'), ('c', 'd'), ('e', 'f')]
144                .iter()
145                .cloned()
146                .collect()
147        );
148    }
149
150    #[test]
151    fn parse_key_errors() {
152        assert_eq!(
153            parse_key("????????A???????b???????c?????").unwrap_err(),
154            "Invalid key character: 'A' (should be in lowercase alphabet)"
155        );
156        assert_eq!(
157            parse_key("a???a??????b???c??????????????").unwrap_err(),
158            "Duplicate mapping of 'a' to 'a' and 'e'"
159        );
160        assert_eq!(
161            parse_key("A:b,c:d,e:f").unwrap_err(),
162            "Invalid key character: 'A' (should be in lowercase alphabet)"
163        );
164        assert_eq!(
165            parse_key("a:B,c:d,e:f").unwrap_err(),
166            "Invalid key character: 'B' (should be in lowercase alphabet)"
167        );
168        assert_eq!(
169            parse_key("ab,cd,af").unwrap_err(),
170            "Duplicate key character: 'a'"
171        );
172        assert_eq!(
173            parse_key("ab,cd,eb").unwrap_err(),
174            "Duplicate mapping of 'b' to 'a' and 'e'"
175        );
176    }
177}