1use once_cell::sync::Lazy;
7use regex::Regex;
8use serde_json::{Map, Value};
9
10use crate::context::JsonLdContext;
11use crate::error::{Result, ToonError};
12
13static TABULAR_HEADER_REGEX: Lazy<Regex> = Lazy::new(|| {
16 Regex::new(r#"^(@?\w+)\[(\d+)\]\{([^}]+)\}:$"#).expect("TABULAR_HEADER_REGEX is invalid")
17});
18
19static PRIMITIVE_ARRAY_REGEX: Lazy<Regex> = Lazy::new(|| {
22 Regex::new(r#"^(@?\w+)\[(\d+)\]:(.*)$"#).expect("PRIMITIVE_ARRAY_REGEX is invalid")
23});
24
25static KEY_VALUE_REGEX: Lazy<Regex> =
28 Lazy::new(|| Regex::new(r#"^(@?\w+(?::\w+)?):\s*(.*)$"#).expect("KEY_VALUE_REGEX is invalid"));
29
30#[derive(Debug, Clone, PartialEq)]
32enum ParseMode {
33 Indented,
35 Csv {
37 fields: Vec<String>,
39 remaining_rows: usize,
41 },
42}
43
44#[derive(Debug, Clone)]
68pub struct ToonParser {
69 context: JsonLdContext,
71}
72
73impl Default for ToonParser {
74 fn default() -> Self {
75 Self::new()
76 }
77}
78
79impl ToonParser {
80 pub fn new() -> Self {
90 Self {
91 context: JsonLdContext::new(),
92 }
93 }
94
95 pub fn with_context(mut self, context: JsonLdContext) -> Self {
112 self.context = context;
113 self
114 }
115
116 pub fn context(&self) -> &JsonLdContext {
118 &self.context
119 }
120
121 pub fn parse(&self, input: &str) -> Result<Value> {
142 let lines: Vec<&str> = input.lines().collect();
143 let (value, _) = self.parse_lines(&lines, 0, 0)?;
144 Ok(value)
145 }
146
147 pub fn parse_to_json(&self, input: &str) -> Result<String> {
169 let value = self.parse(input)?;
170 serde_json::to_string_pretty(&value).map_err(|e| e.into())
171 }
172
173 fn parse_lines(
175 &self,
176 lines: &[&str],
177 start: usize,
178 base_indent: usize,
179 ) -> Result<(Value, usize)> {
180 let mut obj = Map::new();
181 let mut i = start;
182 let mut mode = ParseMode::Indented;
183 let mut current_array_key: Option<String> = None;
184 let mut current_array: Vec<Value> = Vec::new();
185
186 while i < lines.len() {
187 let line = lines[i];
188
189 if line.trim().is_empty() {
191 i += 1;
192 continue;
193 }
194
195 let indent = self.get_indent(line);
196
197 if indent < base_indent && !line.trim().is_empty() {
199 break;
200 }
201
202 match &mode {
204 ParseMode::Csv {
205 fields,
206 remaining_rows,
207 } => {
208 if *remaining_rows > 0 {
209 let row_value = self.parse_csv_row(line.trim(), fields, i + 1)?;
211 current_array.push(row_value);
212
213 let new_remaining = remaining_rows - 1;
214 if new_remaining == 0 {
215 if let Some(key) = current_array_key.take() {
217 obj.insert(key, Value::Array(std::mem::take(&mut current_array)));
218 }
219 mode = ParseMode::Indented;
220 } else {
221 mode = ParseMode::Csv {
222 fields: fields.clone(),
223 remaining_rows: new_remaining,
224 };
225 }
226 i += 1;
227 continue;
228 }
229 }
230 ParseMode::Indented => {}
231 }
232
233 let trimmed = line.trim();
234
235 if let Some(caps) = TABULAR_HEADER_REGEX.captures(trimmed) {
237 let key = self.get_capture_str(&caps, 1, i + 1, "tabular array key")?;
238 let count = self.parse_array_count(&caps, 2, i + 1)?;
239 let fields_str = self.get_capture_str(&caps, 3, i + 1, "tabular array fields")?;
240 let fields: Vec<String> = fields_str
241 .split(',')
242 .map(|s| s.trim().to_string())
243 .collect();
244
245 let should_merge = obj.contains_key(&key.to_string());
247
248 current_array_key = Some(key.to_string());
249
250 if should_merge {
252 if let Some(Value::Array(existing)) = obj.get(&key.to_string()) {
253 current_array = existing.clone();
254 current_array.reserve(count);
255 } else {
256 current_array = Vec::with_capacity(count);
257 }
258 } else {
259 current_array = Vec::with_capacity(count);
260 }
261
262 if count > 0 {
263 mode = ParseMode::Csv {
264 fields,
265 remaining_rows: count,
266 };
267 } else if let Some(key) = current_array_key.take() {
268 if !should_merge {
269 obj.insert(key, Value::Array(Vec::new()));
270 }
271 }
272 i += 1;
273 continue;
274 }
275
276 if let Some(caps) = PRIMITIVE_ARRAY_REGEX.captures(trimmed) {
278 let key = self.get_capture_str(&caps, 1, i + 1, "primitive array key")?;
279 let count = self.parse_array_count(&caps, 2, i + 1)?;
280 let inline_values = caps.get(3).map(|m| m.as_str().trim()).unwrap_or("");
281
282 if !inline_values.is_empty() {
283 let values = self.parse_csv_values(inline_values, i + 1)?;
285 obj.insert(key.to_string(), Value::Array(values));
286 } else if count > 0 {
287 let mut arr = Vec::with_capacity(count);
289 for j in 0..count {
290 i += 1;
291 if i < lines.len() {
292 let val_line = lines[i].trim();
293 let parsed = if let Some(stripped) = val_line.strip_prefix("- ") {
294 self.parse_primitive(stripped, i + 1)?
295 } else {
296 self.parse_primitive(val_line, i + 1)?
297 };
298 arr.push(parsed);
299 } else {
300 return Err(ToonError::parse_error(
301 i + 1,
302 format!(
303 "unexpected end of input while parsing array (expected {} more values)",
304 count - j
305 ),
306 ));
307 }
308 }
309 obj.insert(key.to_string(), Value::Array(arr));
310 } else {
311 obj.insert(key.to_string(), Value::Array(Vec::new()));
312 }
313 i += 1;
314 continue;
315 }
316
317 if let Some(caps) = KEY_VALUE_REGEX.captures(trimmed) {
319 let key = self.get_capture_str(&caps, 1, i + 1, "key")?;
320 let value_str = caps.get(2).map(|m| m.as_str().trim()).unwrap_or("");
321
322 if value_str.is_empty() {
323 let (nested, consumed) = self.parse_lines(lines, i + 1, indent + 2)?;
325 obj.insert(key.to_string(), nested);
326 i = consumed;
327 } else {
328 obj.insert(key.to_string(), self.parse_primitive(value_str, i + 1)?);
330 i += 1;
331 }
332 continue;
333 }
334
335 i += 1;
336 }
337
338 if let Some(key) = current_array_key.take() {
340 if !current_array.is_empty() {
341 obj.insert(key, Value::Array(current_array));
342 }
343 }
344
345 Ok((Value::Object(obj), i))
346 }
347
348 fn get_capture_str<'a>(
350 &self,
351 caps: &'a regex::Captures<'a>,
352 group: usize,
353 line: usize,
354 description: &str,
355 ) -> Result<&'a str> {
356 caps.get(group)
357 .map(|m| m.as_str())
358 .ok_or_else(|| ToonError::parse_error(line, format!("missing {}", description)))
359 }
360
361 fn parse_array_count(
363 &self,
364 caps: ®ex::Captures<'_>,
365 group: usize,
366 line: usize,
367 ) -> Result<usize> {
368 let count_str = self.get_capture_str(caps, group, line, "array count")?;
369 count_str.parse::<usize>().map_err(|_| {
370 ToonError::parse_error(line, format!("invalid array count: {}", count_str))
371 })
372 }
373
374 #[inline]
376 fn get_indent(&self, line: &str) -> usize {
377 line.len() - line.trim_start().len()
378 }
379
380 fn parse_csv_row(&self, line: &str, fields: &[String], line_num: usize) -> Result<Value> {
382 let values = self.parse_csv_values(line, line_num)?;
383 let mut obj = Map::new();
384
385 for (i, field) in fields.iter().enumerate() {
386 let value = values.get(i).cloned().unwrap_or(Value::Null);
387 obj.insert(field.clone(), value);
388 }
389
390 Ok(Value::Object(obj))
391 }
392
393 fn parse_csv_values(&self, line: &str, line_num: usize) -> Result<Vec<Value>> {
395 let mut values = Vec::new();
396 let mut current = String::new();
397 let mut in_quotes = false;
398 let mut chars = line.chars().peekable();
399
400 while let Some(c) = chars.next() {
401 if in_quotes {
402 if c == '"' {
403 if chars.peek() == Some(&'"') {
404 current.push('"');
406 chars.next();
407 } else {
408 in_quotes = false;
409 }
410 } else if c == '\\' {
411 if let Some(next) = chars.next() {
413 match next {
414 '"' => current.push('"'),
415 '\\' => current.push('\\'),
416 'n' => current.push('\n'),
417 't' => current.push('\t'),
418 'r' => current.push('\r'),
419 _ => {
420 current.push('\\');
421 current.push(next);
422 }
423 }
424 }
425 } else {
426 current.push(c);
427 }
428 } else if c == '"' {
429 in_quotes = true;
430 } else if c == ',' {
431 values.push(self.parse_primitive(current.trim(), line_num)?);
432 current.clear();
433 } else {
434 current.push(c);
435 }
436 }
437
438 if !current.is_empty() || !values.is_empty() {
440 values.push(self.parse_primitive(current.trim(), line_num)?);
441 }
442
443 Ok(values)
444 }
445
446 fn parse_primitive(&self, s: &str, _line_num: usize) -> Result<Value> {
448 let s = s.trim();
449
450 if s.starts_with('"') && s.ends_with('"') && s.len() >= 2 {
452 let inner = &s[1..s.len() - 1];
453 return Ok(Value::String(
454 inner.replace("\\\"", "\"").replace("\\\\", "\\"),
455 ));
456 }
457
458 if s == "null" {
460 return Ok(Value::Null);
461 }
462
463 if s == "true" {
465 return Ok(Value::Bool(true));
466 }
467 if s == "false" {
468 return Ok(Value::Bool(false));
469 }
470
471 if let Ok(n) = s.parse::<i64>() {
473 return Ok(Value::Number(n.into()));
474 }
475
476 if let Ok(n) = s.parse::<f64>() {
478 if let Some(num) = serde_json::Number::from_f64(n) {
479 return Ok(Value::Number(num));
480 }
481 }
482
483 Ok(Value::String(s.to_string()))
485 }
486}
487
488#[cfg(test)]
489mod tests {
490 use super::*;
491
492 #[test]
493 fn test_new_parser() {
494 let parser = ToonParser::new();
495 assert!(parser.context().is_empty());
496 }
497
498 #[test]
499 fn test_with_context() {
500 let mut ctx = JsonLdContext::new();
501 ctx.add_prefix("foaf", "http://xmlns.com/foaf/0.1/");
502
503 let parser = ToonParser::new().with_context(ctx);
504 assert!(parser.context().has_prefixes());
505 }
506
507 #[test]
508 fn test_parse_primitives() {
509 let parser = ToonParser::new();
510
511 let toon = r#"
512name: Alice
513age: 30
514score: 3.15
515active: true
516disabled: false
517nothing: null
518"#;
519
520 let value = parser.parse(toon).unwrap();
521 assert_eq!(value.get("name").unwrap(), "Alice");
522 assert_eq!(value.get("age").unwrap(), 30);
523 assert_eq!(value.get("score").unwrap(), 3.15);
524 assert_eq!(value.get("active").unwrap(), true);
525 assert_eq!(value.get("disabled").unwrap(), false);
526 assert!(value.get("nothing").unwrap().is_null());
527 }
528
529 #[test]
530 fn test_parse_quoted_string() {
531 let parser = ToonParser::new();
532
533 let toon = r#"message: "Hello, World!""#;
534 let value = parser.parse(toon).unwrap();
535 assert_eq!(value.get("message").unwrap(), "Hello, World!");
536 }
537
538 #[test]
539 fn test_parse_nested_object() {
540 let parser = ToonParser::new();
541
542 let toon = r#"
543person:
544 name: Alice
545 address:
546 city: Seattle
547 zip: 98101
548"#;
549
550 let value = parser.parse(toon).unwrap();
551 let person = value.get("person").unwrap();
552 assert_eq!(person.get("name").unwrap(), "Alice");
553
554 let address = person.get("address").unwrap();
555 assert_eq!(address.get("city").unwrap(), "Seattle");
556 assert_eq!(address.get("zip").unwrap(), 98101);
557 }
558
559 #[test]
560 fn test_parse_primitive_array_inline() {
561 let parser = ToonParser::new();
562
563 let toon = "tags[3]: rust, wasm, python";
564 let value = parser.parse(toon).unwrap();
565
566 let tags = value.get("tags").unwrap().as_array().unwrap();
567 assert_eq!(tags.len(), 3);
568 assert_eq!(tags[0], "rust");
569 assert_eq!(tags[1], "wasm");
570 assert_eq!(tags[2], "python");
571 }
572
573 #[test]
574 fn test_parse_primitive_array_multiline() {
575 let parser = ToonParser::new();
576
577 let toon = r#"
578numbers[3]:
579 1
580 2
581 3
582"#;
583
584 let value = parser.parse(toon).unwrap();
585 let numbers = value.get("numbers").unwrap().as_array().unwrap();
586 assert_eq!(numbers.len(), 3);
587 assert_eq!(numbers[0], 1);
588 assert_eq!(numbers[1], 2);
589 assert_eq!(numbers[2], 3);
590 }
591
592 #[test]
593 fn test_parse_empty_array() {
594 let parser = ToonParser::new();
595
596 let toon = "items[0]:";
597 let value = parser.parse(toon).unwrap();
598
599 let items = value.get("items").unwrap().as_array().unwrap();
600 assert!(items.is_empty());
601 }
602
603 #[test]
604 fn test_parse_tabular_array() {
605 let parser = ToonParser::new();
606
607 let toon = r#"
608people[2]{name,age}:
609 Alice, 30
610 Bob, 25
611"#;
612
613 let value = parser.parse(toon).unwrap();
614 let people = value.get("people").unwrap().as_array().unwrap();
615 assert_eq!(people.len(), 2);
616
617 assert_eq!(people[0].get("name").unwrap(), "Alice");
618 assert_eq!(people[0].get("age").unwrap(), 30);
619 assert_eq!(people[1].get("name").unwrap(), "Bob");
620 assert_eq!(people[1].get("age").unwrap(), 25);
621 }
622
623 #[test]
624 fn test_parse_tabular_array_with_null() {
625 let parser = ToonParser::new();
626
627 let toon = r#"
628items[2]{a,b,c}:
629 1, 2, null
630 3, null, 4
631"#;
632
633 let value = parser.parse(toon).unwrap();
634 let items = value.get("items").unwrap().as_array().unwrap();
635
636 assert_eq!(items[0].get("a").unwrap(), 1);
637 assert_eq!(items[0].get("b").unwrap(), 2);
638 assert!(items[0].get("c").unwrap().is_null());
639
640 assert_eq!(items[1].get("a").unwrap(), 3);
641 assert!(items[1].get("b").unwrap().is_null());
642 assert_eq!(items[1].get("c").unwrap(), 4);
643 }
644
645 #[test]
646 fn test_parse_tabular_array_with_quoted_values() {
647 let parser = ToonParser::new();
648
649 let toon = r#"
650messages[2]{id,text}:
651 1, "Hello, World!"
652 2, "Goodbye, World!"
653"#;
654
655 let value = parser.parse(toon).unwrap();
656 let messages = value.get("messages").unwrap().as_array().unwrap();
657
658 assert_eq!(messages[0].get("id").unwrap(), 1);
659 assert_eq!(messages[0].get("text").unwrap(), "Hello, World!");
660 assert_eq!(messages[1].get("id").unwrap(), 2);
661 assert_eq!(messages[1].get("text").unwrap(), "Goodbye, World!");
662 }
663
664 #[test]
665 fn test_parse_jsonld_keywords() {
666 let parser = ToonParser::new();
667
668 let toon = r#"
669@context:
670 foaf: http://xmlns.com/foaf/0.1/
671@id: http://example.org/alice
672@type: Person
673"#;
674
675 let value = parser.parse(toon).unwrap();
676 assert!(value.get("@context").is_some());
677 assert_eq!(value.get("@id").unwrap(), "http://example.org/alice");
678 assert_eq!(value.get("@type").unwrap(), "Person");
679 }
680
681 #[test]
682 fn test_parse_to_json() {
683 let parser = ToonParser::new();
684
685 let toon = "name: Alice\nage: 30";
686 let json = parser.parse_to_json(toon).unwrap();
687
688 assert!(json.contains("\"name\""));
689 assert!(json.contains("\"Alice\""));
690 assert!(json.contains("\"age\""));
691 assert!(json.contains("30"));
692 }
693
694 #[test]
695 fn test_parse_escaped_quotes() {
696 let parser = ToonParser::new();
697
698 let toon = r#"message: "She said \"Hello\"""#;
699 let value = parser.parse(toon).unwrap();
700 assert_eq!(value.get("message").unwrap(), "She said \"Hello\"");
701 }
702
703 #[test]
704 fn test_parse_csv_escaped_quotes() {
705 let parser = ToonParser::new();
706
707 let toon = r#"
708items[1]{name,note}:
709 Test, "Said ""Hi"""
710"#;
711
712 let value = parser.parse(toon).unwrap();
713 let items = value.get("items").unwrap().as_array().unwrap();
714 assert_eq!(items[0].get("note").unwrap(), "Said \"Hi\"");
715 }
716
717 #[test]
718 fn test_parse_prefixed_keys() {
719 let parser = ToonParser::new();
720
721 let toon = "foaf:name: Alice\nschema:age: 30";
722 let value = parser.parse(toon).unwrap();
723
724 assert_eq!(value.get("foaf:name").unwrap(), "Alice");
725 assert_eq!(value.get("schema:age").unwrap(), 30);
726 }
727
728 #[test]
729 fn test_roundtrip_basic() {
730 use crate::ToonSerializer;
731
732 let original = serde_json::json!({
733 "name": "Alice",
734 "age": 30,
735 "active": true
736 });
737
738 let serializer = ToonSerializer::new();
739 let parser = ToonParser::new();
740
741 let toon = serializer.serialize(&original).unwrap();
742 let parsed = parser.parse(&toon).unwrap();
743
744 assert_eq!(parsed.get("name").unwrap(), "Alice");
745 assert_eq!(parsed.get("age").unwrap(), 30);
746 assert_eq!(parsed.get("active").unwrap(), true);
747 }
748
749 #[test]
750 fn test_roundtrip_tabular_array() {
751 use crate::ToonSerializer;
752
753 let original = serde_json::json!({
754 "people": [
755 {"name": "Alice", "age": 30},
756 {"name": "Bob", "age": 25}
757 ]
758 });
759
760 let serializer = ToonSerializer::new();
761 let parser = ToonParser::new();
762
763 let toon = serializer.serialize(&original).unwrap();
764 let parsed = parser.parse(&toon).unwrap();
765
766 let people = parsed.get("people").unwrap().as_array().unwrap();
767 assert_eq!(people.len(), 2);
768 assert_eq!(people[0].get("name").unwrap(), "Alice");
769 assert_eq!(people[1].get("name").unwrap(), "Bob");
770 }
771
772 #[test]
773 fn test_parse_multiple_array_blocks_same_key() {
774 let parser = ToonParser::new();
777
778 let toon = r#"
779@graph[2]{@id,@type,name,age}:
780 ex:1, Person, Alice, 30
781 ex:2, Person, Bob, 25
782
783@graph[1]{@id,@type,name,industry}:
784 ex:3, Organization, ACME, Tech
785"#;
786
787 let parsed = parser.parse(toon).unwrap();
788 let graph = parsed.get("@graph").expect("Should have @graph");
789 assert!(graph.is_array());
790
791 let graph_arr = graph.as_array().unwrap();
792 assert_eq!(graph_arr.len(), 3, "Should have merged all 3 entities");
793
794 assert!(graph_arr
796 .iter()
797 .any(|v| v.get("@id").and_then(|id| id.as_str()) == Some("ex:1")));
798 assert!(graph_arr
799 .iter()
800 .any(|v| v.get("@id").and_then(|id| id.as_str()) == Some("ex:2")));
801 assert!(graph_arr
802 .iter()
803 .any(|v| v.get("@id").and_then(|id| id.as_str()) == Some("ex:3")));
804
805 let alice = graph_arr
807 .iter()
808 .find(|v| v.get("@id").and_then(|id| id.as_str()) == Some("ex:1"))
809 .unwrap();
810 assert_eq!(alice.get("age").and_then(|v| v.as_i64()), Some(30));
811 assert!(alice.get("industry").is_none());
812
813 let acme = graph_arr
814 .iter()
815 .find(|v| v.get("@id").and_then(|id| id.as_str()) == Some("ex:3"))
816 .unwrap();
817 assert_eq!(acme.get("industry").and_then(|v| v.as_str()), Some("Tech"));
818 assert!(acme.get("age").is_none());
819 }
820}