Skip to main content

cobble/validator/
string_reader.rs

1/// Brigadier-style string reader for parsing Minecraft commands.
2/// Tracks a cursor position and provides methods for reading various token types.
3#[derive(Debug, Clone)]
4pub struct StringReader {
5    input: Vec<char>,
6    cursor: usize,
7}
8
9impl StringReader {
10    pub fn new(input: &str) -> Self {
11        Self {
12            input: input.chars().collect(),
13            cursor: 0,
14        }
15    }
16
17    pub fn cursor(&self) -> usize {
18        self.cursor
19    }
20
21    pub fn set_cursor(&mut self, pos: usize) {
22        self.cursor = pos;
23    }
24
25    pub fn can_read(&self) -> bool {
26        self.cursor < self.input.len()
27    }
28
29    pub fn can_read_n(&self, n: usize) -> bool {
30        self.cursor + n <= self.input.len()
31    }
32
33    pub fn peek(&self) -> Option<char> {
34        self.input.get(self.cursor).copied()
35    }
36
37    pub fn peek_at(&self, offset: usize) -> Option<char> {
38        self.input.get(self.cursor + offset).copied()
39    }
40
41    pub fn read_char(&mut self) -> Option<char> {
42        let ch = self.input.get(self.cursor).copied()?;
43        self.cursor += 1;
44        Some(ch)
45    }
46
47    pub fn skip_whitespace(&mut self) {
48        while self.cursor < self.input.len() && self.input[self.cursor] == ' ' {
49            self.cursor += 1;
50        }
51    }
52
53    pub fn skip_required_whitespace(&mut self) -> bool {
54        let start = self.cursor;
55        self.skip_whitespace();
56        self.cursor > start
57    }
58
59    pub fn remaining(&self) -> String {
60        self.input[self.cursor..].iter().collect()
61    }
62
63    pub fn slice(&self, start: usize, end: usize) -> String {
64        self.input[start..end].iter().collect()
65    }
66
67    pub fn remaining_len(&self) -> usize {
68        self.input.len() - self.cursor
69    }
70
71    /// Check if the next character is whitespace or end of input (i.e., a token boundary).
72    pub fn at_token_boundary(&self) -> bool {
73        !self.can_read() || self.peek() == Some(' ')
74    }
75
76    /// Try to read a specific literal followed by end-of-input or whitespace.
77    pub fn try_read_literal(&mut self, literal: &str) -> bool {
78        let chars: Vec<char> = literal.chars().collect();
79        let len = chars.len();
80
81        if self.cursor + len > self.input.len() {
82            return false;
83        }
84
85        for (i, &expected) in chars.iter().enumerate() {
86            if self.input[self.cursor + i] != expected {
87                return false;
88            }
89        }
90
91        // Must be followed by space or end of input
92        if self.cursor + len < self.input.len() && self.input[self.cursor + len] != ' ' {
93            return false;
94        }
95
96        self.cursor += len;
97        true
98    }
99
100    /// Read an unquoted string: [a-zA-Z0-9_:.+\-/#]
101    /// Returns the string read, which may be empty.
102    pub fn read_unquoted_string(&mut self) -> String {
103        let start = self.cursor;
104        while self.cursor < self.input.len() && is_unquoted_char(self.input[self.cursor]) {
105            self.cursor += 1;
106        }
107        self.input[start..self.cursor].iter().collect()
108    }
109
110    /// Read a quoted string ("..." or '...') with escape handling.
111    /// Returns true if successfully read.
112    pub fn read_quoted_string(&mut self) -> bool {
113        if !self.can_read() {
114            return false;
115        }
116        let quote = self.input[self.cursor];
117        if quote != '"' && quote != '\'' {
118            return false;
119        }
120        self.cursor += 1;
121
122        while self.can_read() {
123            let ch = self.input[self.cursor];
124            self.cursor += 1;
125
126            if ch == '\\' {
127                // Escape: skip next char
128                if self.can_read() {
129                    self.cursor += 1;
130                }
131                continue;
132            }
133            if ch == quote {
134                return true;
135            }
136        }
137        false
138    }
139
140    /// Read a string: if it starts with quote, read quoted; otherwise read unquoted.
141    pub fn read_string(&mut self) -> bool {
142        if !self.can_read() {
143            return false;
144        }
145        let ch = self.input[self.cursor];
146        if ch == '"' || ch == '\'' {
147            self.read_quoted_string()
148        } else {
149            let s = self.read_unquoted_string();
150            !s.is_empty()
151        }
152    }
153
154    /// Read everything remaining.
155    pub fn read_greedy(&mut self) -> String {
156        let result: String = self.input[self.cursor..].iter().collect();
157        self.cursor = self.input.len();
158        result
159    }
160
161    /// Read an integer (optional sign + digits).
162    pub fn read_integer(&mut self) -> Option<i64> {
163        let start = self.cursor;
164        if self.can_read() && (self.input[self.cursor] == '-' || self.input[self.cursor] == '+') {
165            self.cursor += 1;
166        }
167        if !self.can_read() || !self.input[self.cursor].is_ascii_digit() {
168            self.cursor = start;
169            return None;
170        }
171        while self.can_read() && self.input[self.cursor].is_ascii_digit() {
172            self.cursor += 1;
173        }
174        let s: String = self.input[start..self.cursor].iter().collect();
175        s.parse().ok()
176    }
177
178    /// Read a float/double (integer with optional decimal part and/or exponent).
179    pub fn read_float(&mut self) -> Option<f64> {
180        let start = self.cursor;
181        if self.can_read() && (self.input[self.cursor] == '-' || self.input[self.cursor] == '+') {
182            self.cursor += 1;
183        }
184        let has_digits_before_dot = self.can_read() && self.input[self.cursor].is_ascii_digit();
185
186        while self.can_read() && self.input[self.cursor].is_ascii_digit() {
187            self.cursor += 1;
188        }
189
190        if self.can_read() && self.input[self.cursor] == '.' {
191            self.cursor += 1;
192            let has_digits_after_dot = self.can_read() && self.input[self.cursor].is_ascii_digit();
193            while self.can_read() && self.input[self.cursor].is_ascii_digit() {
194                self.cursor += 1;
195            }
196            if !has_digits_before_dot && !has_digits_after_dot {
197                self.cursor = start;
198                return None;
199            }
200        } else if !has_digits_before_dot {
201            self.cursor = start;
202            return None;
203        }
204
205        // Exponent part
206        if self.can_read() && (self.input[self.cursor] == 'e' || self.input[self.cursor] == 'E') {
207            self.cursor += 1;
208            if self.can_read() && (self.input[self.cursor] == '+' || self.input[self.cursor] == '-')
209            {
210                self.cursor += 1;
211            }
212            while self.can_read() && self.input[self.cursor].is_ascii_digit() {
213                self.cursor += 1;
214            }
215        }
216
217        let s: String = self.input[start..self.cursor].iter().collect();
218        s.parse().ok()
219    }
220
221    /// Read balanced braces: {} or []. Used for NBT/JSON.
222    pub fn read_nbt(&mut self) -> bool {
223        if !self.can_read() {
224            return false;
225        }
226        let open = self.input[self.cursor];
227        let first_close = match open {
228            '{' => '}',
229            '[' => ']',
230            _ => return false,
231        };
232
233        let mut stack = vec![first_close];
234        let mut quote: Option<char> = None;
235        let mut escape_next = false;
236        self.cursor += 1;
237
238        while self.can_read() {
239            let ch = self.input[self.cursor];
240            self.cursor += 1;
241
242            if escape_next {
243                escape_next = false;
244                continue;
245            }
246
247            if ch == '\\' && quote.is_some() {
248                escape_next = true;
249                continue;
250            }
251
252            if let Some(active_quote) = quote {
253                if ch == active_quote {
254                    quote = None;
255                }
256                continue;
257            }
258
259            if ch == '"' || ch == '\'' {
260                quote = Some(ch);
261                continue;
262            }
263
264            match ch {
265                '{' => stack.push('}'),
266                '[' => stack.push(']'),
267                '}' | ']' => {
268                    if stack.pop() != Some(ch) {
269                        return false;
270                    }
271                    if stack.is_empty() {
272                        return true;
273                    }
274                }
275                _ => {}
276            }
277        }
278
279        false
280    }
281
282    /// Read a target selector: @a, @s, @p, @e, @r, optionally with [...]
283    pub fn read_selector(&mut self) -> bool {
284        if !self.can_read() || self.input[self.cursor] != '@' {
285            return false;
286        }
287        self.cursor += 1;
288
289        // Read selector type letter
290        if !self.can_read() || !self.input[self.cursor].is_ascii_alphabetic() {
291            self.cursor -= 1;
292            return false;
293        }
294        self.cursor += 1;
295
296        // Optional [...] selector arguments
297        if self.can_read() && self.input[self.cursor] == '[' {
298            let mut depth = 0;
299            let mut in_string = false;
300            let mut escape_next = false;
301            while self.can_read() {
302                let ch = self.input[self.cursor];
303                self.cursor += 1;
304
305                if escape_next {
306                    escape_next = false;
307                    continue;
308                }
309                if ch == '\\' && in_string {
310                    escape_next = true;
311                    continue;
312                }
313                if ch == '"' {
314                    in_string = !in_string;
315                    continue;
316                }
317                if in_string {
318                    continue;
319                }
320
321                if ch == '[' {
322                    depth += 1;
323                } else if ch == ']' {
324                    depth -= 1;
325                    if depth == 0 {
326                        return true;
327                    }
328                }
329                // Also handle nested {} inside selectors (NBT)
330                if ch == '{' {
331                    // Unconsume the '{' and delegate to read_nbt
332                    self.cursor -= 1;
333                    self.read_nbt();
334                    continue;
335                }
336            }
337            return false;
338        }
339
340        true
341    }
342
343    /// Read a coordinate value: ~, ~N, ^, ^N, or a plain number.
344    /// Returns true if a coordinate was read.
345    pub fn read_coordinate(&mut self) -> bool {
346        if !self.can_read() {
347            return false;
348        }
349
350        // Handle macro placeholder
351        if self.try_read_macro() {
352            return true;
353        }
354
355        let ch = self.input[self.cursor];
356
357        if ch == '~' || ch == '^' {
358            self.cursor += 1;
359            // Optionally followed by a number (no space between)
360            let _ = self.read_float();
361            return true;
362        }
363
364        // Plain number
365        self.read_float().is_some()
366    }
367
368    /// Try to read a $(macro) placeholder. Consumes $( ... ) if found.
369    pub fn try_read_macro(&mut self) -> bool {
370        let saved = self.cursor;
371        if self.cursor + 1 < self.input.len()
372            && self.input[self.cursor] == '$'
373            && self.input[self.cursor + 1] == '('
374        {
375            self.cursor += 2;
376            while self.can_read() {
377                let ch = self.input[self.cursor];
378                self.cursor += 1;
379                if ch == ')' {
380                    return true;
381                }
382            }
383            self.cursor = saved;
384            false
385        } else {
386            false
387        }
388    }
389}
390
391/// Characters allowed in unquoted strings in Minecraft/Brigadier.
392fn is_unquoted_char(c: char) -> bool {
393    c.is_ascii_alphanumeric()
394        || c == '_'
395        || c == ':'
396        || c == '.'
397        || c == '/'
398        || c == '+'
399        || c == '-'
400        || c == '#'
401        || c == '*'
402}
403
404#[cfg(test)]
405mod tests {
406    use super::*;
407
408    #[test]
409    fn test_read_literal() {
410        let mut r = StringReader::new("say hello");
411        assert!(r.try_read_literal("say"));
412        assert_eq!(r.cursor(), 3);
413
414        r.skip_whitespace();
415        assert_eq!(r.remaining(), "hello");
416    }
417
418    #[test]
419    fn test_read_quoted_string() {
420        let mut r = StringReader::new("\"hello world\" rest");
421        assert!(r.read_quoted_string());
422        assert_eq!(r.remaining(), " rest");
423    }
424
425    #[test]
426    fn test_read_selector() {
427        let mut r = StringReader::new("@a[tag=foo,limit=1] rest");
428        assert!(r.read_selector());
429        assert_eq!(r.remaining(), " rest");
430    }
431
432    #[test]
433    fn test_read_coordinate() {
434        let mut r = StringReader::new("~10 ~ ^-3.5");
435        assert!(r.read_coordinate());
436        assert_eq!(r.remaining(), " ~ ^-3.5");
437        r.skip_whitespace();
438        assert!(r.read_coordinate());
439        assert_eq!(r.remaining(), " ^-3.5");
440        r.skip_whitespace();
441        assert!(r.read_coordinate());
442        assert!(r.remaining().is_empty());
443    }
444
445    #[test]
446    fn test_read_nbt() {
447        let mut r = StringReader::new("{key:\"value\",nested:{a:1}} rest");
448        assert!(r.read_nbt());
449        assert_eq!(r.remaining(), " rest");
450
451        let mut array = StringReader::new("[{key:\"value\"}] rest");
452        assert!(array.read_nbt());
453        assert_eq!(array.remaining(), " rest");
454    }
455
456    #[test]
457    fn test_read_nbt_rejects_mismatched_nested_delimiters() {
458        let mut r = StringReader::new("{Items:[{id:\"stone\"}} rest");
459        assert!(!r.read_nbt());
460    }
461
462    #[test]
463    fn test_read_integer() {
464        let mut r = StringReader::new("-42 rest");
465        assert_eq!(r.read_integer(), Some(-42));
466        assert_eq!(r.remaining(), " rest");
467    }
468
469    #[test]
470    fn test_read_float() {
471        let mut r = StringReader::new("2.5 rest");
472        assert_eq!(r.read_float(), Some(2.5));
473        assert_eq!(r.remaining(), " rest");
474    }
475
476    #[test]
477    fn test_read_macro() {
478        let mut r = StringReader::new("$(arg) rest");
479        assert!(r.try_read_macro());
480        assert_eq!(r.remaining(), " rest");
481    }
482}