gtk_ui_builder/parser/
tokenizer.rs

1use std::collections::VecDeque;
2
3use super::token::Token;
4use super::tokenize_error::TokenizeError;
5
6pub struct Tokenizer;
7
8impl Tokenizer {
9    /// Parse syntax tokens from input string
10    /// 
11    /// ```
12    /// use gtk_ui_builder::parser::prelude::*;
13    /// 
14    /// let tokens = Tokenizer::parse("[a b c]").expect("Failed to tokenize");
15    /// 
16    /// if let Token::SquareBrackets { tokens, .. } = &tokens[0] {
17    ///     let mut list = Vec::new();
18    /// 
19    ///     for token in tokens {
20    ///         if let Token::Other { value, .. } = token {
21    ///             list.push(value);
22    ///         }
23    ///     }
24    /// 
25    ///     println!("Tokenized list: {:?}", list);
26    /// }
27    /// ```
28    pub fn parse<T: ToString>(text: T) -> Result<Vec<Token>, TokenizeError> {
29        let mut tokens = Vec::new();
30        let mut word = String::new();
31
32        let text = text.to_string().chars().collect::<Vec<char>>();
33
34        let mut token_begin = 0;
35        let mut i = 0;
36
37        while i < text.len() {
38            // Other token end
39            if text[i].is_whitespace() {
40                // Two ifs to avoid else execution
41                if !word.is_empty() {
42                    tokens.push(Token::Other {
43                        begin: token_begin,
44                        end: i - 1,
45                        value: word
46                    });
47
48                    word = String::new();
49                }
50
51                token_begin = i + 1;
52            }
53
54            // Parse string
55            else if text[i] == '"' {
56                // Save not empty word as Other token
57                if !word.is_empty() {
58                    tokens.push(Token::Other {
59                        begin: token_begin,
60                        end: i - 1,
61                        value: word
62                    });
63
64                    token_begin = i;
65                    word = String::new();
66                }
67
68                let mut correct_str = false;
69                i += 1;
70
71                while i < text.len() {
72                    // TODO: slashes encoding
73                    if text[i] == '"' {
74                        correct_str = true;
75
76                        break;
77                    }
78
79                    else {
80                        word.push(text[i]);
81                    }
82
83                    i += 1;
84                }
85
86                // Return Err if string end wasn't found ("string)
87                if !correct_str {
88                    return Err(TokenizeError::IncorrectString {
89                        message: format!("Incorrect string format found from offset {} to {}", token_begin, i),
90                        begin: token_begin,
91                        end: i,
92                        wrong_string: word
93                    });
94                }
95
96                tokens.push(Token::Text {
97                    begin: token_begin,
98                    end: i,
99                    value: word
100                });
101
102                word = String::new();
103                token_begin = i + 1;
104            }
105
106            // Parse brackets
107            else if text[i] == '(' || text[i] == '[' || text[i] == '{' {
108                // Save not empty word as Other token
109                if !word.is_empty() {
110                    tokens.push(Token::Other {
111                        begin: token_begin,
112                        end: i - 1,
113                        value: word
114                    });
115
116                    token_begin = i;
117                    word = String::new();
118                }
119
120                let mut brackets_stack = VecDeque::from([text[i]]);
121                let mut correct_order = false;
122
123                i += 1;
124
125                while i < text.len() {
126                    if let Some(rev_bracket) = Self::get_rev_bracket(text[i]) {
127                        if brackets_stack.back() == Some(&rev_bracket) {
128                            brackets_stack.pop_back();
129
130                            if brackets_stack.is_empty() {
131                                correct_order = true;
132
133                                break;
134                            }
135                        }
136
137                        else {
138                            brackets_stack.push_back(text[i]);
139                        }
140                    }
141
142                    word.push(text[i]);
143                    i += 1;
144                }
145
146                // Return Err if brackets end wasn't found ([string)
147                if !correct_order {
148                    return Err(TokenizeError::IncorrectBrackets {
149                        message: format!("Incorrect brackets format found from offset {} to {}", token_begin, i),
150                        begin: token_begin,
151                        end: i,
152                        wrong_string: word
153                    });
154                }
155
156                let sub_tokens = Self::inc_tokens_offsets(Self::parse(word)?, token_begin + 1);
157
158                tokens.push(match &text[token_begin] {
159                    '(' => Token::Parentheses    { begin: token_begin, end: i, tokens: sub_tokens },
160                    '[' => Token::SquareBrackets { begin: token_begin, end: i, tokens: sub_tokens },
161                    '{' => Token::CurlyBrackets  { begin: token_begin, end: i, tokens: sub_tokens },
162                    _ => unreachable!()
163                });
164
165                word = String::new();
166                token_begin = i + 1;
167            }
168
169            // Push Other token character
170            // There may be a situation like {}; where ; will be parsed as Other
171            // while {} as CurlyBrackets, so technically ; will be the first character. This is wrong
172            // so we need check previous character
173            else { /* if Self::is_normal_char(text[i], word.is_empty() && (if i > 0 { text[i - 1].is_whitespace() } else { true })) {*/
174                word.push(text[i]);
175            }
176
177            // Wrong Other token character
178            /*else {
179                return Err(TokenizeError::IncorrectChar {
180                    message: format!("Incorrect character ({}) found at offset {}", text[i], i),
181                    wrong_string: word,
182                    offset: i
183                });
184            }*/
185
186            i += 1;
187        }
188
189        if !word.is_empty() {
190            tokens.push(Token::Other {
191                begin: token_begin,
192                end: text.len() - 1,
193                value: word
194            });
195        }
196
197        Ok(tokens)
198    }
199
200    fn get_rev_bracket(bracket: char) -> Option<char> {
201        match bracket {
202            '(' => Some(')'),
203            '[' => Some(']'),
204            '{' => Some('}'),
205
206            ')' => Some('('),
207            ']' => Some('['),
208            '}' => Some('{'),
209
210            _ => None
211        }
212    }
213
214    /*fn is_normal_char(char: char, first: bool) -> bool {
215        !first ||
216        (char >= 'a' && char <= 'z') ||
217        (char >= 'A' && char <= 'Z') ||
218        (char >= '0' && char <= '9') ||
219        char == '.' || char == '_' || char == '='
220    }*/
221
222    fn inc_tokens_offsets(mut tokens: Vec<Token>, offset: usize) -> Vec<Token> {
223        for token in &mut tokens {
224            match token {
225                Token::Text  { begin, end, .. } => { *begin += offset; *end += offset; },
226                Token::Other { begin, end, .. } => { *begin += offset; *end += offset; },
227
228                Token::Parentheses { begin, end, tokens } => {
229                    *begin += offset;
230                    *end += offset;
231                    *tokens = Self::inc_tokens_offsets(tokens.clone(), offset);
232                },
233
234                Token::SquareBrackets { begin, end, tokens } => {
235                    *begin += offset;
236                    *end += offset;
237                    *tokens = Self::inc_tokens_offsets(tokens.clone(), offset);
238                },
239
240                Token::CurlyBrackets  { begin, end, tokens } => {
241                    *begin += offset;
242                    *end += offset;
243                    *tokens = Self::inc_tokens_offsets(tokens.clone(), offset);
244                }
245            }
246        }
247
248        tokens
249    }
250}