garnish_lang_simple_data/data/
parsing.rs

1use crate::data::SimpleNumber;
2use crate::DataError;
3use std::iter;
4use std::str::FromStr;
5
6/// Called when building instructions to parse strings into character lists.
7pub fn parse_char_list(input: &str) -> Result<String, DataError> {
8    let mut new = String::new();
9
10    if input.len() == 0 {
11        return Ok(new);
12    }
13
14    let mut start_quote_count = 0;
15    for c in input.chars() {
16        if c == '"' {
17            start_quote_count += 1;
18        } else {
19            break;
20        }
21    }
22
23    if start_quote_count == input.len() {
24        return Ok(new);
25    }
26
27    let real_len = input.len() - start_quote_count * 2;
28
29    let mut check_escape = false;
30    let mut in_unicode = false;
31    let mut unicode_characters = String::new();
32
33    for c in input.chars().skip(start_quote_count).take(real_len) {
34        if in_unicode {
35            if c == '}' {
36                match parse_number_internal(unicode_characters.as_str(), 16)? {
37                    SimpleNumber::Float(_) => Err(DataError::from(format!(
38                        "Float numbers are not allowed in Unicode escape. {:?}",
39                        unicode_characters
40                    )))?,
41                    SimpleNumber::Integer(v) => match char::from_u32(v as u32) {
42                        None => Err(DataError::from(format!(
43                            "Invalid unicode value {:?}. Max is {:?}",
44                            unicode_characters,
45                            char::MAX.to_digit(16)
46                        )))?,
47                        Some(v) => {
48                            new.push(v);
49                            unicode_characters = String::new();
50                        }
51                    },
52                }
53                in_unicode = false;
54            } else {
55                if c != '{' {
56                    unicode_characters.push(c);
57                }
58            }
59
60            continue;
61        }
62
63        if check_escape {
64            match c {
65                'n' => new.push('\n'),
66                't' => new.push('\t'),
67                'r' => new.push('\r'),
68                '0' => new.push('\0'),
69                '\\' => new.push('\\'),
70                '"' => new.push('"'),
71                'u' => in_unicode = true,
72                _ => return Err(DataError::from(format!("Invalid escape character '{}'", c))),
73            }
74
75            check_escape = false;
76            continue;
77        }
78
79        match c {
80            '\\' => check_escape = true,
81            '\n' | '\t' if start_quote_count <= 1 => (), // skip
82            _ => new.push(c),
83        }
84    }
85
86    Ok(new)
87}
88
89/// Called when building instructions to parse strings into byte lists.
90pub fn parse_byte_list(input: &str) -> Result<Vec<u8>, DataError> {
91    let mut bytes = vec![];
92
93    let mut start_quote_count = 0;
94    for c in input.chars() {
95        if c == '\'' {
96            start_quote_count += 1;
97        } else {
98            break;
99        }
100    }
101
102    let real_len = input.len() - start_quote_count * 2;
103
104    if start_quote_count >= 2 {
105        parse_byte_list_numbers(&input[start_quote_count..(input.len() - start_quote_count)])
106    } else {
107        let mut check_escape = false;
108        for c in input.chars().skip(start_quote_count).take(real_len) {
109            if check_escape {
110                match c {
111                    'n' => bytes.push('\n' as u8),
112                    't' => bytes.push('\t' as u8),
113                    'r' => bytes.push('\r' as u8),
114                    '0' => bytes.push('\0' as u8),
115                    '\\' => bytes.push('\\' as u8),
116                    '\'' => bytes.push('\'' as u8),
117                    _ => return Err(DataError::from(format!("Invalid escape character '{}'", c))),
118                }
119
120                check_escape = false;
121                continue;
122            }
123
124            if c == '\\' {
125                check_escape = true
126            } else {
127                bytes.push(c as u8);
128            }
129        }
130
131        Ok(bytes)
132    }
133}
134
135/// Called when building instructions to parse strings into byte lists.
136fn parse_byte_list_numbers(input: &str) -> Result<Vec<u8>, DataError> {
137    let mut current_number = String::new();
138    let mut numbers = vec![];
139
140    for c in input.chars().chain(iter::once(' ')) {
141        if c.is_numeric() || c == '_' {
142            current_number.push(c);
143        } else if c == ' ' && current_number.len() > 0 {
144            match parse_simple_number(current_number.as_str())? {
145                SimpleNumber::Float(_) => Err(DataError::from(format!(
146                    "Float numbers are not allowed in ByteLists. {:?}",
147                    current_number
148                )))?,
149                SimpleNumber::Integer(v) => {
150                    if v < 0 || v > u8::MAX as i32 {
151                        Err(DataError::from(format!("Number to large for byte value {:?}", current_number)))?;
152                    }
153
154                    numbers.push(v as u8);
155                    current_number = String::new();
156                }
157            }
158        } else {
159            Err(DataError::from(format!("Invalid character in byte number {:?}", c)))?;
160        }
161    }
162
163    Ok(numbers)
164}
165
166/// Called when building instructions to parse strings into [`SimpleNumber`].
167pub fn parse_simple_number(input: &str) -> Result<SimpleNumber, DataError> {
168    parse_number_internal(input, 10)
169}
170
171fn parse_number_internal(input: &str, default_radix: u32) -> Result<SimpleNumber, DataError> {
172    let (radix, input) = match input.find('_') {
173        None => (default_radix, input),
174        Some(i) => {
175            let part = &input[0..i];
176            if part.starts_with("0") {
177                let trimmed = part.trim_matches('0');
178                match u32::from_str(trimmed) {
179                    Err(_) => Err(DataError::from(format!("Could not parse radix from {:?}", part)))?,
180                    Ok(v) => {
181                        if v < 2 || v > 36 {
182                            // limit of Rust from_str_radix function below
183                            Err(DataError::from(format!("Radix must be with in range [2, 36]. Found {:?}", v)))?
184                        } else {
185                            // + 1 to skip the underscore
186                            (v, &input[i + 1..])
187                        }
188                    }
189                }
190            } else {
191                (default_radix, input)
192            }
193        }
194    };
195
196    // consider remaining underscores visual separators and replace with empty
197    let stripped = input.replace("_", "");
198    match i32::from_str_radix(&stripped, radix) {
199        Ok(v) => Ok(v.into()),
200        Err(_) => {
201            if radix == 10 {
202                match f64::from_str(&stripped) {
203                    Ok(v) => Ok(v.into()),
204                    Err(_) => Err(DataError::from(format!("Could not create SimpleNumber from string {:?}", input))),
205                }
206            } else {
207                Err(DataError::from(format!("Decimal values only support a radix of 10. Found {:?}", radix)))
208            }
209        }
210    }
211}
212
213#[cfg(test)]
214mod numbers {
215    use crate::data::parse_simple_number;
216    use crate::data::SimpleNumber::*;
217
218    #[test]
219    fn just_numbers_integer() {
220        let input = "123456";
221        assert_eq!(parse_simple_number(input).unwrap(), Integer(123456));
222    }
223
224    #[test]
225    fn just_numbers_integer_with_visual_separators() {
226        let input = "123_456";
227        assert_eq!(parse_simple_number(input).unwrap(), Integer(123456));
228    }
229
230    #[test]
231    fn negative_integer() {
232        let input = "-123456";
233        assert_eq!(parse_simple_number(input).unwrap(), Integer(-123456));
234    }
235
236    #[test]
237    fn min_integer() {
238        let input = i32::MIN.to_string();
239        assert_eq!(parse_simple_number(input.as_str()).unwrap(), Integer(i32::MIN));
240    }
241
242    #[test]
243    fn max_integer() {
244        let input = i32::MAX.to_string();
245        assert_eq!(parse_simple_number(input.as_str()).unwrap(), Integer(i32::MAX));
246    }
247
248    #[test]
249    fn just_numbers_integer_err() {
250        let input = "123456?";
251        assert!(parse_simple_number(input).is_err());
252    }
253
254    #[test]
255    fn just_numbers_float() {
256        let input = "123456.789";
257        assert_eq!(parse_simple_number(input).unwrap(), Float(123456.789));
258    }
259
260    #[test]
261    fn just_numbers_float_with_visual_separators() {
262        let input = "123.456_789";
263        assert_eq!(parse_simple_number(input).unwrap(), Float(123.456789));
264    }
265
266    #[test]
267    fn negative_float() {
268        let input = "-123456.789";
269        assert_eq!(parse_simple_number(input).unwrap(), Float(-123456.789));
270    }
271
272    #[test]
273    fn just_numbers_float_err() {
274        let input = "123456.789?";
275        assert!(parse_simple_number(input).is_err());
276    }
277
278    #[test]
279    fn just_numbers_base_2() {
280        let input = "02_1010101";
281        assert_eq!(parse_simple_number(input).unwrap(), Integer(0b1010101));
282    }
283
284    #[test]
285    fn just_numbers_base_36() {
286        let input = "036_C7R";
287        assert_eq!(parse_simple_number(input).unwrap(), Integer(15831));
288    }
289
290    #[test]
291    fn just_numbers_base_1_is_err() {
292        let input = "01_1010101";
293        assert!(parse_simple_number(input).is_err());
294    }
295
296    #[test]
297    fn just_numbers_base_37_is_err() {
298        let input = "037_1010101";
299        assert!(parse_simple_number(input).is_err());
300    }
301
302    #[test]
303    fn radix_valid_float_is_err() {
304        let input = "02_10101.0101";
305        assert!(parse_simple_number(input).is_err());
306    }
307
308    #[test]
309    fn radix_invalid_float_is_err() {
310        let input = "016_A6.789";
311        assert!(parse_simple_number(input).is_err());
312    }
313}
314
315#[cfg(test)]
316mod char_list {
317    use crate::data::parse_char_list;
318
319    #[test]
320    fn true_empty() {
321        let input = "";
322        assert_eq!(parse_char_list(input).unwrap(), "".to_string())
323    }
324
325    #[test]
326    fn empty() {
327        let input = "\"\"";
328        assert_eq!(parse_char_list(input).unwrap(), "".to_string())
329    }
330
331    #[test]
332    fn empty_multi_quote() {
333        let input = "\"\"\"\"\"\"";
334        assert_eq!(parse_char_list(input).unwrap(), "".to_string())
335    }
336
337    #[test]
338    fn skip_starting_and_ending_quotes() {
339        let input = "\"\"\"Some String\"\"\"";
340        assert_eq!(parse_char_list(input).unwrap(), "Some String".to_string())
341    }
342
343    #[test]
344    fn newlines_and_tabs_are_removed_in_single_double_quote() {
345        let input = "\"Some\n\t\t\tString\"";
346        assert_eq!(parse_char_list(input).unwrap(), "SomeString".to_string())
347    }
348
349    #[test]
350    fn newlines_and_tabs_are_retained_in_multi_double_quote() {
351        let input = "\"\"Some\n\t\t\tString\"\"";
352        assert_eq!(parse_char_list(input).unwrap(), "Some\n\t\t\tString".to_string())
353    }
354
355    #[test]
356    fn convert_newlines() {
357        let input = "Some\\nString";
358        assert_eq!(parse_char_list(input).unwrap(), "Some\nString".to_string())
359    }
360
361    #[test]
362    fn convert_unicode() {
363        let input = "Some\\u{25A1}String";
364        assert_eq!(parse_char_list(input).unwrap(), "Some\u{25A1}String".to_string())
365    }
366
367    #[test]
368    fn convert_multiple_newlines() {
369        let input = "Some\\n\\nString";
370        assert_eq!(parse_char_list(input).unwrap(), "Some\n\nString".to_string())
371    }
372
373    #[test]
374    fn convert_tabs() {
375        let input = "Some\\tString";
376        assert_eq!(parse_char_list(input).unwrap(), "Some\tString".to_string())
377    }
378
379    #[test]
380    fn convert_carriage_return() {
381        let input = "Some\\rString";
382        assert_eq!(parse_char_list(input).unwrap(), "Some\rString".to_string())
383    }
384
385    #[test]
386    fn convert_null() {
387        let input = "Some\\0String";
388        assert_eq!(parse_char_list(input).unwrap(), "Some\0String".to_string())
389    }
390
391    #[test]
392    fn convert_backslash() {
393        let input = "Some\\\\String";
394        assert_eq!(parse_char_list(input).unwrap(), "Some\\String".to_string())
395    }
396
397    #[test]
398    fn convert_quote() {
399        let input = "Some\\\"String";
400        assert_eq!(parse_char_list(input).unwrap(), "Some\"String".to_string())
401    }
402
403    #[test]
404    fn invalid_escape_sequence() {
405        let input = "Some\\yString";
406        assert!(parse_char_list(input).is_err())
407    }
408
409    #[test]
410    fn invalid_unicode() {
411        let input = "Some\\u{FFFFFF}String";
412        assert!(parse_char_list(input).is_err())
413    }
414}
415
416#[cfg(test)]
417mod byte_list {
418    use crate::data::parse_byte_list;
419
420    #[test]
421    fn skip_starting_and_ending_quotes() {
422        let input = "'a'";
423        assert_eq!(parse_byte_list(input).unwrap(), vec!['a' as u8])
424    }
425
426    #[test]
427    fn convert_newlines() {
428        let input = "'\\n'";
429        assert_eq!(parse_byte_list(input).unwrap(), vec!['\n' as u8])
430    }
431
432    #[test]
433    fn convert_tabs() {
434        let input = "'\\t'";
435        assert_eq!(parse_byte_list(input).unwrap(), vec!['\t' as u8])
436    }
437
438    #[test]
439    fn convert_carriage_return() {
440        let input = "'\\r'";
441        assert_eq!(parse_byte_list(input).unwrap(), vec!['\r' as u8])
442    }
443
444    #[test]
445    fn convert_null() {
446        let input = "'\\0'";
447        assert_eq!(parse_byte_list(input).unwrap(), vec!['\0' as u8])
448    }
449
450    #[test]
451    fn convert_backslash() {
452        let input = "'\\\\'";
453        assert_eq!(parse_byte_list(input).unwrap(), vec!['\\' as u8])
454    }
455
456    #[test]
457    fn convert_quote() {
458        let input = "'\\''";
459        assert_eq!(parse_byte_list(input).unwrap(), vec!['\'' as u8])
460    }
461
462    #[test]
463    fn invalid_escape_sequence() {
464        let input = "'\\y'";
465        assert!(parse_byte_list(input).is_err())
466    }
467
468    #[test]
469    fn double_quote_is_series_off_byte_numbers() {
470        let input = "''100 150 200 250''";
471        assert_eq!(parse_byte_list(input).unwrap(), vec![100, 150, 200, 250])
472    }
473
474    #[test]
475    fn double_quote_is_series_off_byte_numbers_radix_two() {
476        let input = "''02_1111 02_0101 02_1001''";
477        assert_eq!(parse_byte_list(input).unwrap(), vec![0b1111, 0b0101, 0b1001])
478    }
479
480    #[test]
481    fn double_quote_is_series_off_byte_numbers_invalid_number() {
482        let input = "''abc 150 200 250''";
483        assert!(parse_byte_list(input).is_err())
484    }
485
486    #[test]
487    fn double_quote_is_series_off_byte_numbers_number_to_large() {
488        let input = "''100 300 150''";
489        assert!(parse_byte_list(input).is_err())
490    }
491
492    #[test]
493    fn double_quote_is_series_off_byte_numbers_number_negative() {
494        let input = "''100 -150 200''";
495        assert!(parse_byte_list(input).is_err())
496    }
497}