yapg/
charsets.rs

1use std::convert::{Into, TryFrom};
2use std::io;
3
4/// Contains all lower-case latin letters
5pub static CHARSET_ALPHA_LOWER: [char; 26] = [
6    'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o',
7    'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z',
8];
9
10/// Contains all upper-case latin letters.
11pub static CHARSET_ALPHA_UPPER: [char; 26] = [
12    'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O',
13    'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z',
14];
15
16/// Contains all digits.
17pub static CHARSET_NUMERIC: [char; 10] =
18    ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9'];
19
20/// Contains '.', ':', ',', ';', '!', '?', ' ', '\'', and '"'.
21pub static CHARSET_PROSE: [char; 9] =
22    ['.', ':', ',', ';', '!', '?', ' ', '\'', '"'];
23
24/// Contains '+', '-', '*', '/', '=', '<', and '>'.
25pub static CHARSET_MATHOPS: [char; 7] = ['+', '-', '*', '/', '=', '<', '>'];
26
27/// Contains '(', ')', '[', ']', '{', and '}'.
28pub static CHARSET_DELIM: [char; 6] = ['(', ')', '[', ']', '{', '}'];
29
30/// Contains '#', '@', '$', '%', '&', '|', '\\', '~',
31/// '^', '_', and '`'.
32pub static CHARSET_MISC_SPECIAL: [char; 11] =
33    ['#', '@', '$', '%', '&', '|', '\\', '~', '^', '_', '`'];
34
35// total specials: 9 + 7 + 6 + 11 = 33
36// ----------------------- intermediaries for user IO ----------------------- //
37/// Translation layer between chars (e.g. for cli flags) and the actual
38/// character sets.
39///
40/// Especially, you can do `CharsetName::from::<char>(c)`. Translations are:
41///
42/// | CharsetName   | associated char | contained chars                                       |
43/// | ------------- | --------------- | ----------------------------------------------------- |
44/// | `AlphaLower`  | `'L'`           | matching regex `[a-z]`                                |
45/// | `AlphaUpper`  | `'U'`           | matching regex `[A-Z]`                                |
46/// | `Numeric`     | `'N'`           | matching regex `[0-9]`                                |
47/// | `Mathops`     | `'M'`           | `+`, `-`, `*`, `/`, `=`, `<`, `>`                     |
48/// | `Prose`       | `'P'`           | `.`, `,`, `:`, `;`, `!`, `?`, `'`, `"`, ` `           |
49/// | `Delim`       | `'D'`           | `(`, `)`, `{`, `}`, `[`, `]`                          |
50/// | `MiscSpecial` | `'X'`           | `#`, `@`, `$`, `%`, `&`, `|`, `\`, `~`, `^`, `_`, ``` |
51///
52/// For convenience, there are also some charsets built from the "atomic"
53/// charsets shown above:
54///
55/// | CharsetName | associated char | Contained Charsets                                           |
56/// | ----------- | --------------- | ------------------------------------------------------------ |
57/// | `Alpha`     | `'A'`           | `AlphaLower`, `AlphaUpper`                                   |
58/// | `Special`   | `'S'`           | `Mathops`, `Punct`, `Delim`, `Quote`, `Blank`, `MiscSpecial` |
59#[derive(Debug, PartialEq)]
60pub enum CharsetName {
61    // atomic
62    AlphaLower,
63    AlphaUpper,
64    Numeric,
65    Mathops,
66    Prose,
67    Delim,
68    MiscSpecial,
69    // compound
70    Alpha,
71    Special,
72}
73
74impl TryFrom<char> for CharsetName {
75    type Error = io::Error;
76
77    fn try_from(c: char) -> io::Result<Self> {
78        match c {
79            // atomic
80            'U' => Ok(Self::AlphaUpper),
81            'L' => Ok(Self::AlphaLower),
82            'N' => Ok(Self::Numeric),
83            'M' => Ok(Self::Mathops),
84            'P' => Ok(Self::Prose),
85            'D' => Ok(Self::Delim),
86            'X' => Ok(Self::MiscSpecial),
87            // compound
88            'A' => Ok(Self::Alpha),
89            'S' => Ok(Self::Special),
90            // invalid input
91            _ => Err(io::Error::new(
92                io::ErrorKind::InvalidInput,
93                format!("Invalid character set abbreviation: {}", c),
94            )),
95        }
96    }
97}
98
99// TODO: impl as bitflags with: method for AND/OR
100/// Represents a specification for a charset
101///
102/// Any of the predefined `CharsetName`s can be toggled and additional
103/// characters may be included.
104/// For this purpose, `CharsetSpec` implements `AddAssign<CharsetName>` and
105/// `SubAssign<CharsetName>`.
106/// Alternatively, you can parse a string containing the corresponding chars.
107///
108/// # Example
109///
110/// ```
111/// let mut spec = yapg::CharsetSpec::empty();
112/// spec += yapg::CharsetName::Numeric; // Adding a named charset
113/// spec += "+-*"; // Adding a string
114/// spec += '/'; // Adding a single char
115///
116/// assert_eq!(spec.construct().as_slice(), [
117///     '*', '+', '-', '/', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9',
118/// ]);
119/// ```
120#[derive(Debug)]
121pub struct CharsetSpec {
122    alpha_lower: bool,
123    alpha_upper: bool,
124    numeric: bool,
125    mathops: bool,
126    prose: bool,
127    delim: bool,
128    misc_special: bool,
129    additions: Vec<char>,
130}
131
132impl CharsetSpec {
133    /// Builds the actual character set in form of a `Vec<char>`, which is
134    /// sorted and deduplicated.
135    pub fn construct(mut self) -> Vec<char> {
136        let mut set = vec![];
137        if self.alpha_lower {
138            set.append(&mut CHARSET_ALPHA_LOWER.to_vec());
139        }
140        if self.alpha_upper {
141            set.append(&mut CHARSET_ALPHA_UPPER.to_vec());
142        }
143        if self.numeric {
144            set.append(&mut CHARSET_NUMERIC.to_vec());
145        }
146        if self.mathops {
147            set.append(&mut CHARSET_MATHOPS.to_vec());
148        }
149        if self.prose {
150            set.append(&mut CHARSET_PROSE.to_vec());
151        }
152        if self.delim {
153            set.append(&mut CHARSET_DELIM.to_vec());
154        }
155        if self.misc_special {
156            set.append(&mut CHARSET_MISC_SPECIAL.to_vec());
157        }
158        set.append(&mut self.additions);
159        set.sort();
160        set.dedup();
161        set
162    }
163
164    /// Creates the specification for an empty charset.
165    ///
166    /// # Example
167    /// ```
168    /// let charset = yapg::CharsetSpec::empty().construct();
169    /// assert_eq!(charset.len(), 0);
170    /// ```
171    pub fn empty() -> Self {
172        Self {
173            alpha_lower: false,
174            alpha_upper: false,
175            numeric: false,
176            mathops: false,
177            prose: false,
178            delim: false,
179            misc_special: false,
180            additions: vec![],
181        }
182    }
183
184    /// Creates the specification for a standard charset, including all
185    /// alphanumerics, `-` and `_`.
186    /// Should be safe to use in most places, except for strict
187    /// "no-special-characters"-policies or where neither `-` nor `_` are
188    /// considered to be special, yet special chars are required.
189    ///
190    /// # Example
191    /// ```
192    /// let charset = yapg::CharsetSpec::std64().construct();
193    /// assert_eq!(charset.len(), 64);
194    /// ```
195    pub fn std64() -> Self {
196        Self {
197            alpha_lower: true,
198            alpha_upper: true,
199            numeric: true,
200            mathops: false,
201            prose: false,
202            delim: false,
203            misc_special: false,
204            additions: vec!['-', '_'],
205        }
206    }
207
208    /// Creates the specification for charset that contains all printable ASCII
209    /// characters.
210    ///
211    /// # Example
212    /// ```
213    /// let charset = yapg::CharsetSpec::printable_ascii().construct();
214    /// assert_eq!(charset.len(), 95);
215    /// ```
216    pub fn printable_ascii() -> Self {
217        Self {
218            alpha_lower: true,
219            alpha_upper: true,
220            numeric: true,
221            mathops: true,
222            prose: true,
223            delim: true,
224            misc_special: true,
225            additions: vec![],
226        }
227    }
228}
229
230impl std::str::FromStr for CharsetSpec {
231    type Err = io::Error;
232
233    fn from_str(s: &str) -> io::Result<Self> {
234        let mut spec = Self::empty();
235        for c in s.chars() {
236            let name = CharsetName::try_from(c)?;
237            spec += name;
238        }
239        Ok(spec)
240    }
241}
242
243impl Into<Vec<char>> for CharsetSpec {
244    #[inline]
245    fn into(self) -> Vec<char> { self.construct() }
246}
247
248impl std::ops::AddAssign<&str> for CharsetSpec {
249    #[inline]
250    fn add_assign(&mut self, more: &str) {
251        for c in more.chars() {
252            (*self) += c;
253        }
254    }
255}
256
257impl std::ops::AddAssign<char> for CharsetSpec {
258    #[inline]
259    fn add_assign(&mut self, c: char) { self.additions.push(c); }
260}
261
262impl std::ops::AddAssign<CharsetName> for CharsetSpec {
263    fn add_assign(&mut self, name: CharsetName) {
264        match name {
265            // atomic
266            CharsetName::AlphaLower => self.alpha_lower = true,
267            CharsetName::AlphaUpper => self.alpha_upper = true,
268            CharsetName::Numeric => self.numeric = true,
269            CharsetName::Mathops => self.mathops = true,
270            CharsetName::Prose => self.prose = true,
271            CharsetName::Delim => self.delim = true,
272            CharsetName::MiscSpecial => self.misc_special = true,
273            // compound
274            CharsetName::Alpha => {
275                self.alpha_lower = true;
276                self.alpha_upper = true;
277            },
278            CharsetName::Special => {
279                self.mathops = true;
280                self.prose = true;
281                self.delim = true;
282                self.misc_special = true;
283            },
284        }
285    }
286}
287
288impl std::ops::SubAssign<CharsetName> for CharsetSpec {
289    fn sub_assign(&mut self, name: CharsetName) {
290        match name {
291            // atomic
292            CharsetName::AlphaLower => self.alpha_lower = false,
293            CharsetName::AlphaUpper => self.alpha_upper = false,
294            CharsetName::Numeric => self.numeric = false,
295            CharsetName::Mathops => self.mathops = false,
296            CharsetName::Prose => self.prose = false,
297            CharsetName::Delim => self.delim = false,
298            CharsetName::MiscSpecial => self.misc_special = false,
299            // compound
300            CharsetName::Alpha => {
301                self.alpha_lower = false;
302                self.alpha_upper = false;
303            },
304            CharsetName::Special => {
305                self.mathops = false;
306                self.prose = false;
307                self.delim = false;
308                self.misc_special = false;
309            },
310        }
311    }
312}
313
314// ------------------------------- unit tests ------------------------------- //
315#[cfg(test)]
316mod tests {
317    use std::convert::TryFrom;
318
319    use super::CharsetName::*;
320    use super::{CharsetName, CharsetSpec};
321
322    #[test]
323    fn parsing_charset_names() {
324        // atomic
325        assert_eq!(CharsetName::try_from('U').unwrap(), AlphaUpper);
326        assert_eq!(CharsetName::try_from('L').unwrap(), AlphaLower);
327        assert_eq!(CharsetName::try_from('N').unwrap(), Numeric);
328        assert_eq!(CharsetName::try_from('M').unwrap(), Mathops);
329        assert_eq!(CharsetName::try_from('P').unwrap(), Prose);
330        assert_eq!(CharsetName::try_from('D').unwrap(), Delim);
331        assert_eq!(CharsetName::try_from('X').unwrap(), MiscSpecial);
332        // compound
333        assert_eq!(CharsetName::try_from('A').unwrap(), Alpha);
334        assert_eq!(CharsetName::try_from('S').unwrap(), Special);
335        // invalid input
336        assert!(CharsetName::try_from('Z').is_err());
337    }
338
339    #[test]
340    fn parsing_charset_specs() {
341        let (alpha, alnum) = {
342            let mut alpha = [
343                super::CHARSET_ALPHA_UPPER.to_vec(),
344                super::CHARSET_ALPHA_LOWER.to_vec(),
345            ]
346            .concat();
347            let mut alnum = [
348                super::CHARSET_ALPHA_UPPER.to_vec(),
349                super::CHARSET_ALPHA_LOWER.to_vec(),
350                super::CHARSET_NUMERIC.to_vec(),
351            ]
352            .concat();
353            alpha.sort();
354            alnum.sort();
355            (alpha, alnum)
356        };
357
358        assert_eq!("LU".parse::<CharsetSpec>().unwrap().construct(), alpha);
359        assert_eq!("LUN".parse::<CharsetSpec>().unwrap().construct(), alnum);
360    }
361
362    #[test]
363    fn adding_charset_to_spec() {
364        let mut spec = CharsetSpec::empty();
365        spec += Mathops;
366        assert_eq!(spec.construct(), vec!['*', '+', '-', '/', '<', '=', '>'])
367    }
368
369    #[test]
370    fn subtracting_charset_from_spec() {
371        let mut spec = CharsetSpec::std64();
372        spec -= Alpha;
373        spec -= Numeric;
374        assert_eq!(spec.construct(), vec!['-', '_'])
375    }
376
377    #[test]
378    fn adding_chars_to_spec() {
379        let mut spec = CharsetSpec::empty();
380        spec += 'a';
381        spec += 'b';
382        spec += 'c';
383        spec += 'd';
384        assert_eq!(spec.construct(), vec!['a', 'b', 'c', 'd']);
385    }
386
387    #[test]
388    fn adding_strings_to_spec() {
389        let mut spec = CharsetSpec::empty();
390        spec += "abcd";
391        assert_eq!(spec.construct(), vec!['a', 'b', 'c', 'd']);
392    }
393}