Skip to main content

pixelsrc/
tokenizer.rs

1//! Token extraction from grid strings
2
3/// A warning generated during tokenization
4#[derive(Debug, Clone, PartialEq)]
5pub struct Warning {
6    pub message: String,
7}
8
9impl Warning {
10    pub fn new(message: impl Into<String>) -> Self {
11        Self { message: message.into() }
12    }
13}
14
15/// Extracts tokens from a grid row string.
16///
17/// Tokens are of the form `{name}` where name can contain any characters
18/// except `}`. Characters outside of tokens generate warnings.
19///
20/// # Examples
21///
22/// ```
23/// use pixelsrc::tokenizer::tokenize;
24///
25/// let (tokens, warnings) = tokenize("{a}{b}{c}");
26/// assert_eq!(tokens, vec!["{a}", "{b}", "{c}"]);
27/// assert!(warnings.is_empty());
28///
29/// let (tokens, warnings) = tokenize("x{a}y");
30/// assert_eq!(tokens, vec!["{a}"]);
31/// assert_eq!(warnings.len(), 2); // warnings for 'x' and 'y'
32/// ```
33pub fn tokenize(row: &str) -> (Vec<String>, Vec<Warning>) {
34    let mut tokens = Vec::new();
35    let mut warnings = Vec::new();
36    let mut chars = row.chars().peekable();
37
38    while let Some(c) = chars.next() {
39        if c == '{' {
40            // Start of a token
41            let mut token = String::from("{");
42            let mut closed = false;
43
44            for inner in chars.by_ref() {
45                token.push(inner);
46                if inner == '}' {
47                    closed = true;
48                    break;
49                }
50            }
51
52            if closed {
53                tokens.push(token);
54            } else {
55                // Unclosed token
56                warnings.push(Warning::new(format!("Unclosed token '{}' in grid row", token)));
57            }
58        } else {
59            // Character outside token
60            warnings.push(Warning::new(format!("Unexpected character '{}' in grid row", c)));
61        }
62    }
63
64    (tokens, warnings)
65}
66
67#[cfg(test)]
68mod tests {
69    use super::*;
70
71    #[test]
72    fn test_simple_tokens() {
73        let (tokens, warnings) = tokenize("{a}{b}{c}");
74        assert_eq!(tokens, vec!["{a}", "{b}", "{c}"]);
75        assert!(warnings.is_empty());
76    }
77
78    #[test]
79    fn test_extra_characters() {
80        let (tokens, warnings) = tokenize("x{a}y");
81        assert_eq!(tokens, vec!["{a}"]);
82        assert_eq!(warnings.len(), 2);
83        assert!(warnings[0].message.contains("'x'"));
84        assert!(warnings[1].message.contains("'y'"));
85    }
86
87    #[test]
88    fn test_unclosed_token() {
89        let (tokens, warnings) = tokenize("{unclosed");
90        assert!(tokens.is_empty());
91        assert_eq!(warnings.len(), 1);
92        assert!(warnings[0].message.contains("Unclosed"));
93    }
94
95    #[test]
96    fn test_empty_string() {
97        let (tokens, warnings) = tokenize("");
98        assert!(tokens.is_empty());
99        assert!(warnings.is_empty());
100    }
101
102    #[test]
103    fn test_longer_token_names() {
104        let (tokens, warnings) = tokenize("{_}{skin}{_}");
105        assert_eq!(tokens, vec!["{_}", "{skin}", "{_}"]);
106        assert!(warnings.is_empty());
107    }
108
109    #[test]
110    fn test_complex_token_names() {
111        let (tokens, warnings) = tokenize("{long_name}{x}");
112        assert_eq!(tokens, vec!["{long_name}", "{x}"]);
113        assert!(warnings.is_empty());
114    }
115
116    #[test]
117    fn test_multiple_extra_chars() {
118        let (tokens, warnings) = tokenize("abc{x}def{x}ghi");
119        assert_eq!(tokens, vec!["{x}", "{x}"]);
120        assert_eq!(warnings.len(), 9); // a,b,c,d,e,f,g,h,i
121    }
122}