garnish_lang_simple_data/data/
parsing.rs1use crate::data::SimpleNumber;
2use crate::DataError;
3use std::iter;
4use std::str::FromStr;
5
6pub fn parse_char_list(input: &str) -> Result<String, DataError> {
8 let mut new = String::new();
9
10 if input.len() == 0 {
11 return Ok(new);
12 }
13
14 let mut start_quote_count = 0;
15 for c in input.chars() {
16 if c == '"' {
17 start_quote_count += 1;
18 } else {
19 break;
20 }
21 }
22
23 if start_quote_count == input.len() {
24 return Ok(new);
25 }
26
27 let real_len = input.len() - start_quote_count * 2;
28
29 let mut check_escape = false;
30 let mut in_unicode = false;
31 let mut unicode_characters = String::new();
32
33 for c in input.chars().skip(start_quote_count).take(real_len) {
34 if in_unicode {
35 if c == '}' {
36 match parse_number_internal(unicode_characters.as_str(), 16)? {
37 SimpleNumber::Float(_) => Err(DataError::from(format!(
38 "Float numbers are not allowed in Unicode escape. {:?}",
39 unicode_characters
40 )))?,
41 SimpleNumber::Integer(v) => match char::from_u32(v as u32) {
42 None => Err(DataError::from(format!(
43 "Invalid unicode value {:?}. Max is {:?}",
44 unicode_characters,
45 char::MAX.to_digit(16)
46 )))?,
47 Some(v) => {
48 new.push(v);
49 unicode_characters = String::new();
50 }
51 },
52 }
53 in_unicode = false;
54 } else {
55 if c != '{' {
56 unicode_characters.push(c);
57 }
58 }
59
60 continue;
61 }
62
63 if check_escape {
64 match c {
65 'n' => new.push('\n'),
66 't' => new.push('\t'),
67 'r' => new.push('\r'),
68 '0' => new.push('\0'),
69 '\\' => new.push('\\'),
70 '"' => new.push('"'),
71 'u' => in_unicode = true,
72 _ => return Err(DataError::from(format!("Invalid escape character '{}'", c))),
73 }
74
75 check_escape = false;
76 continue;
77 }
78
79 match c {
80 '\\' => check_escape = true,
81 '\n' | '\t' if start_quote_count <= 1 => (), _ => new.push(c),
83 }
84 }
85
86 Ok(new)
87}
88
89pub fn parse_byte_list(input: &str) -> Result<Vec<u8>, DataError> {
91 let mut bytes = vec![];
92
93 let mut start_quote_count = 0;
94 for c in input.chars() {
95 if c == '\'' {
96 start_quote_count += 1;
97 } else {
98 break;
99 }
100 }
101
102 let real_len = input.len() - start_quote_count * 2;
103
104 if start_quote_count >= 2 {
105 parse_byte_list_numbers(&input[start_quote_count..(input.len() - start_quote_count)])
106 } else {
107 let mut check_escape = false;
108 for c in input.chars().skip(start_quote_count).take(real_len) {
109 if check_escape {
110 match c {
111 'n' => bytes.push('\n' as u8),
112 't' => bytes.push('\t' as u8),
113 'r' => bytes.push('\r' as u8),
114 '0' => bytes.push('\0' as u8),
115 '\\' => bytes.push('\\' as u8),
116 '\'' => bytes.push('\'' as u8),
117 _ => return Err(DataError::from(format!("Invalid escape character '{}'", c))),
118 }
119
120 check_escape = false;
121 continue;
122 }
123
124 if c == '\\' {
125 check_escape = true
126 } else {
127 bytes.push(c as u8);
128 }
129 }
130
131 Ok(bytes)
132 }
133}
134
135fn parse_byte_list_numbers(input: &str) -> Result<Vec<u8>, DataError> {
137 let mut current_number = String::new();
138 let mut numbers = vec![];
139
140 for c in input.chars().chain(iter::once(' ')) {
141 if c.is_numeric() || c == '_' {
142 current_number.push(c);
143 } else if c == ' ' && current_number.len() > 0 {
144 match parse_simple_number(current_number.as_str())? {
145 SimpleNumber::Float(_) => Err(DataError::from(format!(
146 "Float numbers are not allowed in ByteLists. {:?}",
147 current_number
148 )))?,
149 SimpleNumber::Integer(v) => {
150 if v < 0 || v > u8::MAX as i32 {
151 Err(DataError::from(format!("Number to large for byte value {:?}", current_number)))?;
152 }
153
154 numbers.push(v as u8);
155 current_number = String::new();
156 }
157 }
158 } else {
159 Err(DataError::from(format!("Invalid character in byte number {:?}", c)))?;
160 }
161 }
162
163 Ok(numbers)
164}
165
166pub fn parse_simple_number(input: &str) -> Result<SimpleNumber, DataError> {
168 parse_number_internal(input, 10)
169}
170
171fn parse_number_internal(input: &str, default_radix: u32) -> Result<SimpleNumber, DataError> {
172 let (radix, input) = match input.find('_') {
173 None => (default_radix, input),
174 Some(i) => {
175 let part = &input[0..i];
176 if part.starts_with("0") {
177 let trimmed = part.trim_matches('0');
178 match u32::from_str(trimmed) {
179 Err(_) => Err(DataError::from(format!("Could not parse radix from {:?}", part)))?,
180 Ok(v) => {
181 if v < 2 || v > 36 {
182 Err(DataError::from(format!("Radix must be with in range [2, 36]. Found {:?}", v)))?
184 } else {
185 (v, &input[i + 1..])
187 }
188 }
189 }
190 } else {
191 (default_radix, input)
192 }
193 }
194 };
195
196 let stripped = input.replace("_", "");
198 match i32::from_str_radix(&stripped, radix) {
199 Ok(v) => Ok(v.into()),
200 Err(_) => {
201 if radix == 10 {
202 match f64::from_str(&stripped) {
203 Ok(v) => Ok(v.into()),
204 Err(_) => Err(DataError::from(format!("Could not create SimpleNumber from string {:?}", input))),
205 }
206 } else {
207 Err(DataError::from(format!("Decimal values only support a radix of 10. Found {:?}", radix)))
208 }
209 }
210 }
211}
212
213#[cfg(test)]
214mod numbers {
215 use crate::data::parse_simple_number;
216 use crate::data::SimpleNumber::*;
217
218 #[test]
219 fn just_numbers_integer() {
220 let input = "123456";
221 assert_eq!(parse_simple_number(input).unwrap(), Integer(123456));
222 }
223
224 #[test]
225 fn just_numbers_integer_with_visual_separators() {
226 let input = "123_456";
227 assert_eq!(parse_simple_number(input).unwrap(), Integer(123456));
228 }
229
230 #[test]
231 fn negative_integer() {
232 let input = "-123456";
233 assert_eq!(parse_simple_number(input).unwrap(), Integer(-123456));
234 }
235
236 #[test]
237 fn min_integer() {
238 let input = i32::MIN.to_string();
239 assert_eq!(parse_simple_number(input.as_str()).unwrap(), Integer(i32::MIN));
240 }
241
242 #[test]
243 fn max_integer() {
244 let input = i32::MAX.to_string();
245 assert_eq!(parse_simple_number(input.as_str()).unwrap(), Integer(i32::MAX));
246 }
247
248 #[test]
249 fn just_numbers_integer_err() {
250 let input = "123456?";
251 assert!(parse_simple_number(input).is_err());
252 }
253
254 #[test]
255 fn just_numbers_float() {
256 let input = "123456.789";
257 assert_eq!(parse_simple_number(input).unwrap(), Float(123456.789));
258 }
259
260 #[test]
261 fn just_numbers_float_with_visual_separators() {
262 let input = "123.456_789";
263 assert_eq!(parse_simple_number(input).unwrap(), Float(123.456789));
264 }
265
266 #[test]
267 fn negative_float() {
268 let input = "-123456.789";
269 assert_eq!(parse_simple_number(input).unwrap(), Float(-123456.789));
270 }
271
272 #[test]
273 fn just_numbers_float_err() {
274 let input = "123456.789?";
275 assert!(parse_simple_number(input).is_err());
276 }
277
278 #[test]
279 fn just_numbers_base_2() {
280 let input = "02_1010101";
281 assert_eq!(parse_simple_number(input).unwrap(), Integer(0b1010101));
282 }
283
284 #[test]
285 fn just_numbers_base_36() {
286 let input = "036_C7R";
287 assert_eq!(parse_simple_number(input).unwrap(), Integer(15831));
288 }
289
290 #[test]
291 fn just_numbers_base_1_is_err() {
292 let input = "01_1010101";
293 assert!(parse_simple_number(input).is_err());
294 }
295
296 #[test]
297 fn just_numbers_base_37_is_err() {
298 let input = "037_1010101";
299 assert!(parse_simple_number(input).is_err());
300 }
301
302 #[test]
303 fn radix_valid_float_is_err() {
304 let input = "02_10101.0101";
305 assert!(parse_simple_number(input).is_err());
306 }
307
308 #[test]
309 fn radix_invalid_float_is_err() {
310 let input = "016_A6.789";
311 assert!(parse_simple_number(input).is_err());
312 }
313}
314
315#[cfg(test)]
316mod char_list {
317 use crate::data::parse_char_list;
318
319 #[test]
320 fn true_empty() {
321 let input = "";
322 assert_eq!(parse_char_list(input).unwrap(), "".to_string())
323 }
324
325 #[test]
326 fn empty() {
327 let input = "\"\"";
328 assert_eq!(parse_char_list(input).unwrap(), "".to_string())
329 }
330
331 #[test]
332 fn empty_multi_quote() {
333 let input = "\"\"\"\"\"\"";
334 assert_eq!(parse_char_list(input).unwrap(), "".to_string())
335 }
336
337 #[test]
338 fn skip_starting_and_ending_quotes() {
339 let input = "\"\"\"Some String\"\"\"";
340 assert_eq!(parse_char_list(input).unwrap(), "Some String".to_string())
341 }
342
343 #[test]
344 fn newlines_and_tabs_are_removed_in_single_double_quote() {
345 let input = "\"Some\n\t\t\tString\"";
346 assert_eq!(parse_char_list(input).unwrap(), "SomeString".to_string())
347 }
348
349 #[test]
350 fn newlines_and_tabs_are_retained_in_multi_double_quote() {
351 let input = "\"\"Some\n\t\t\tString\"\"";
352 assert_eq!(parse_char_list(input).unwrap(), "Some\n\t\t\tString".to_string())
353 }
354
355 #[test]
356 fn convert_newlines() {
357 let input = "Some\\nString";
358 assert_eq!(parse_char_list(input).unwrap(), "Some\nString".to_string())
359 }
360
361 #[test]
362 fn convert_unicode() {
363 let input = "Some\\u{25A1}String";
364 assert_eq!(parse_char_list(input).unwrap(), "Some\u{25A1}String".to_string())
365 }
366
367 #[test]
368 fn convert_multiple_newlines() {
369 let input = "Some\\n\\nString";
370 assert_eq!(parse_char_list(input).unwrap(), "Some\n\nString".to_string())
371 }
372
373 #[test]
374 fn convert_tabs() {
375 let input = "Some\\tString";
376 assert_eq!(parse_char_list(input).unwrap(), "Some\tString".to_string())
377 }
378
379 #[test]
380 fn convert_carriage_return() {
381 let input = "Some\\rString";
382 assert_eq!(parse_char_list(input).unwrap(), "Some\rString".to_string())
383 }
384
385 #[test]
386 fn convert_null() {
387 let input = "Some\\0String";
388 assert_eq!(parse_char_list(input).unwrap(), "Some\0String".to_string())
389 }
390
391 #[test]
392 fn convert_backslash() {
393 let input = "Some\\\\String";
394 assert_eq!(parse_char_list(input).unwrap(), "Some\\String".to_string())
395 }
396
397 #[test]
398 fn convert_quote() {
399 let input = "Some\\\"String";
400 assert_eq!(parse_char_list(input).unwrap(), "Some\"String".to_string())
401 }
402
403 #[test]
404 fn invalid_escape_sequence() {
405 let input = "Some\\yString";
406 assert!(parse_char_list(input).is_err())
407 }
408
409 #[test]
410 fn invalid_unicode() {
411 let input = "Some\\u{FFFFFF}String";
412 assert!(parse_char_list(input).is_err())
413 }
414}
415
416#[cfg(test)]
417mod byte_list {
418 use crate::data::parse_byte_list;
419
420 #[test]
421 fn skip_starting_and_ending_quotes() {
422 let input = "'a'";
423 assert_eq!(parse_byte_list(input).unwrap(), vec!['a' as u8])
424 }
425
426 #[test]
427 fn convert_newlines() {
428 let input = "'\\n'";
429 assert_eq!(parse_byte_list(input).unwrap(), vec!['\n' as u8])
430 }
431
432 #[test]
433 fn convert_tabs() {
434 let input = "'\\t'";
435 assert_eq!(parse_byte_list(input).unwrap(), vec!['\t' as u8])
436 }
437
438 #[test]
439 fn convert_carriage_return() {
440 let input = "'\\r'";
441 assert_eq!(parse_byte_list(input).unwrap(), vec!['\r' as u8])
442 }
443
444 #[test]
445 fn convert_null() {
446 let input = "'\\0'";
447 assert_eq!(parse_byte_list(input).unwrap(), vec!['\0' as u8])
448 }
449
450 #[test]
451 fn convert_backslash() {
452 let input = "'\\\\'";
453 assert_eq!(parse_byte_list(input).unwrap(), vec!['\\' as u8])
454 }
455
456 #[test]
457 fn convert_quote() {
458 let input = "'\\''";
459 assert_eq!(parse_byte_list(input).unwrap(), vec!['\'' as u8])
460 }
461
462 #[test]
463 fn invalid_escape_sequence() {
464 let input = "'\\y'";
465 assert!(parse_byte_list(input).is_err())
466 }
467
468 #[test]
469 fn double_quote_is_series_off_byte_numbers() {
470 let input = "''100 150 200 250''";
471 assert_eq!(parse_byte_list(input).unwrap(), vec![100, 150, 200, 250])
472 }
473
474 #[test]
475 fn double_quote_is_series_off_byte_numbers_radix_two() {
476 let input = "''02_1111 02_0101 02_1001''";
477 assert_eq!(parse_byte_list(input).unwrap(), vec![0b1111, 0b0101, 0b1001])
478 }
479
480 #[test]
481 fn double_quote_is_series_off_byte_numbers_invalid_number() {
482 let input = "''abc 150 200 250''";
483 assert!(parse_byte_list(input).is_err())
484 }
485
486 #[test]
487 fn double_quote_is_series_off_byte_numbers_number_to_large() {
488 let input = "''100 300 150''";
489 assert!(parse_byte_list(input).is_err())
490 }
491
492 #[test]
493 fn double_quote_is_series_off_byte_numbers_number_negative() {
494 let input = "''100 -150 200''";
495 assert!(parse_byte_list(input).is_err())
496 }
497}