1use once_cell::sync::Lazy;
7use regex::Regex;
8use serde_json::{Map, Value};
9use std::collections::HashSet;
10
11use crate::context::JsonLdContext;
12use crate::error::Result;
13use crate::keywords::*;
14
15static NEEDS_QUOTE_REGEX: Lazy<Regex> =
17 Lazy::new(|| Regex::new(r#"[,:|]|^\s|\s$"#).expect("NEEDS_QUOTE_REGEX is invalid"));
18
19const DEFAULT_INDENT_SIZE: usize = 2;
21
22const MAX_INLINE_ARRAY_LENGTH: usize = 60;
24
25const SPARSITY_THRESHOLD: f64 = 0.30;
28
29#[derive(Debug, Clone)]
54pub struct ToonSerializer {
55 context: JsonLdContext,
57 indent_size: usize,
59 enable_shape_partitioning: bool,
61}
62
63impl Default for ToonSerializer {
64 fn default() -> Self {
65 Self::new()
66 }
67}
68
69impl ToonSerializer {
70 pub fn new() -> Self {
80 Self {
81 context: JsonLdContext::new(),
82 indent_size: DEFAULT_INDENT_SIZE,
83 enable_shape_partitioning: true,
84 }
85 }
86
87 pub fn with_context(mut self, context: JsonLdContext) -> Self {
104 self.context = context;
105 self
106 }
107
108 pub fn with_indent_size(mut self, size: usize) -> Self {
122 self.indent_size = size;
123 self
124 }
125
126 pub fn context(&self) -> &JsonLdContext {
128 &self.context
129 }
130
131 pub fn indent_size(&self) -> usize {
133 self.indent_size
134 }
135
136 pub fn with_shape_partitioning(mut self, enable: bool) -> Self {
153 self.enable_shape_partitioning = enable;
154 self
155 }
156
157 pub fn serialize(&self, value: &Value) -> Result<String> {
178 let mut output = String::new();
179 self.serialize_value(value, 0, &mut output)?;
180 Ok(output)
181 }
182
183 pub fn serialize_json(&self, json: &str) -> Result<String> {
202 let value: Value = serde_json::from_str(json)?;
203 self.serialize(&value)
204 }
205
206 fn serialize_value(&self, value: &Value, depth: usize, output: &mut String) -> Result<()> {
208 match value {
209 Value::Null => output.push_str("null"),
210 Value::Bool(b) => output.push_str(if *b { "true" } else { "false" }),
211 Value::Number(n) => output.push_str(&n.to_string()),
212 Value::String(s) => output.push_str(&self.quote_if_needed(s)),
213 Value::Array(arr) => self.serialize_standalone_array(arr, depth, output)?,
214 Value::Object(obj) => self.serialize_object(obj, depth, output)?,
215 }
216 Ok(())
217 }
218
219 fn serialize_standalone_array(
221 &self,
222 arr: &[Value],
223 depth: usize,
224 output: &mut String,
225 ) -> Result<()> {
226 let indent = self.make_indent(depth);
227
228 if arr.is_empty() {
229 output.push_str("[]");
230 return Ok(());
231 }
232
233 if let Some(fields) = self.get_tabular_fields(arr) {
235 let compact_fields: Vec<String> =
237 fields.iter().map(|f| self.context.compact_uri(f)).collect();
238 output.push_str(&format!(
239 "[{}]{{{}}}:\n",
240 arr.len(),
241 compact_fields.join(",")
242 ));
243 let row_indent = self.make_indent(depth + 1);
244 for item in arr {
245 if let Value::Object(obj) = item {
246 let values: Vec<String> = fields
247 .iter()
248 .map(|field| {
249 obj.get(field)
250 .map(|v| self.value_to_csv_cell(v))
251 .unwrap_or_else(|| "null".to_string())
252 })
253 .collect();
254 output.push_str(&format!("{}{}\n", row_indent, values.join(", ")));
255 }
256 }
257 } else if self.is_primitive_array(arr) {
258 self.serialize_inline_primitive_array(arr, depth, output)?;
259 } else {
260 output.push_str(&format!("{}[{}]:\n", indent, arr.len()));
262 for item in arr {
263 let item_indent = self.make_indent(depth + 1);
264 output.push_str(&item_indent);
265 output.push_str("- ");
266 match item {
267 Value::Object(obj) => {
268 output.push('\n');
269 self.serialize_object(obj, depth + 2, output)?;
270 }
271 _ => {
272 self.serialize_value(item, depth + 1, output)?;
273 output.push('\n');
274 }
275 }
276 }
277 }
278 Ok(())
279 }
280
281 fn serialize_inline_primitive_array(
283 &self,
284 arr: &[Value],
285 depth: usize,
286 output: &mut String,
287 ) -> Result<()> {
288 let values: Vec<String> = arr.iter().map(|v| self.value_to_csv_cell(v)).collect();
289 let inline = values.join(", ");
290
291 if inline.len() < MAX_INLINE_ARRAY_LENGTH {
292 output.push_str(&format!("[{}]: {}", arr.len(), inline));
293 } else {
294 output.push_str(&format!("[{}]:\n", arr.len()));
295 let row_indent = self.make_indent(depth + 1);
296 for value in &values {
297 output.push_str(&format!("{}{}\n", row_indent, value));
298 }
299 }
300 Ok(())
301 }
302
303 fn serialize_object(
305 &self,
306 obj: &Map<String, Value>,
307 depth: usize,
308 output: &mut String,
309 ) -> Result<()> {
310 let indent = self.make_indent(depth);
311
312 let mut keys: Vec<&String> = obj.keys().collect();
314 keys.sort_by(|a, b| {
315 keyword_order(a)
316 .cmp(&keyword_order(b))
317 .then_with(|| a.cmp(b))
318 });
319
320 for key in keys {
321 let value = obj.get(key).expect("key exists in object we're iterating");
323 self.serialize_object_entry(key, value, depth, &indent, output)?;
324 }
325 Ok(())
326 }
327
328 fn serialize_object_entry(
330 &self,
331 key: &str,
332 value: &Value,
333 depth: usize,
334 indent: &str,
335 output: &mut String,
336 ) -> Result<()> {
337 let display_key = self.get_display_key(key);
338
339 match key {
340 JSONLD_GRAPH => {
342 if let Value::Array(arr) = value {
343 self.serialize_keyed_array(&display_key, arr, depth, output)?;
344 } else {
345 output.push_str(&format!("{}{}:\n", indent, display_key));
346 self.serialize_value(value, depth + 1, output)?;
347 }
348 }
349 JSONLD_CONTEXT => {
351 self.serialize_context(value, depth, output)?;
352 }
353 JSONLD_BASE | JSONLD_VOCAB => {
355 output.push_str(&format!("{}{}: ", indent, display_key));
356 self.serialize_value(value, depth, output)?;
357 output.push('\n');
358 }
359 JSONLD_ID => match value {
361 Value::Array(arr) => {
362 self.serialize_keyed_array(&display_key, arr, depth, output)?;
363 }
364 _ => {
365 output.push_str(&format!("{}{}: ", indent, display_key));
366 self.serialize_value(value, depth, output)?;
367 output.push('\n');
368 }
369 },
370 JSONLD_TYPE => match value {
372 Value::Array(arr) => {
373 self.serialize_keyed_array(&display_key, arr, depth, output)?;
374 }
375 Value::String(s) => {
376 let compact_type = self.context.compact_uri(s);
378 output.push_str(&format!("{}{}: {}\n", indent, display_key, compact_type));
379 }
380 _ => {
381 output.push_str(&format!("{}{}: ", indent, display_key));
382 self.serialize_value(value, depth, output)?;
383 output.push('\n');
384 }
385 },
386 JSONLD_REVERSE => {
388 output.push_str(&format!("{}{}:\n", indent, TOON_REVERSE));
389 if let Value::Object(rev_obj) = value {
390 self.serialize_object(rev_obj, depth + 1, output)?;
391 }
392 }
393 JSONLD_LIST => {
395 if let Value::Array(arr) = value {
396 self.serialize_keyed_array(TOON_LIST, arr, depth, output)?;
397 }
398 }
399 JSONLD_SET => {
401 if let Value::Array(arr) = value {
402 self.serialize_keyed_array(TOON_SET, arr, depth, output)?;
403 }
404 }
405 JSONLD_VALUE | JSONLD_LANGUAGE => {
408 output.push_str(&format!("{}{}: ", indent, display_key));
409 self.serialize_value(value, depth, output)?;
410 output.push('\n');
411 }
412 JSONLD_INCLUDED => {
414 if let Value::Array(arr) = value {
415 self.serialize_keyed_array(TOON_INCLUDED, arr, depth, output)?;
416 } else {
417 output.push_str(&format!("{}{}:\n", indent, TOON_INCLUDED));
418 self.serialize_value(value, depth + 1, output)?;
419 }
420 }
421 JSONLD_INDEX => {
423 output.push_str(&format!("{}{}: ", indent, TOON_INDEX));
424 self.serialize_value(value, depth, output)?;
425 output.push('\n');
426 }
427 JSONLD_NEST => {
429 output.push_str(&format!("{}{}:\n", indent, TOON_NEST));
430 if let Value::Object(nest_obj) = value {
431 self.serialize_object(nest_obj, depth + 1, output)?;
432 }
433 }
434 JSONLD_CONTAINER => match value {
436 Value::Array(arr) => {
437 self.serialize_keyed_array(TOON_CONTAINER, arr, depth, output)?;
438 }
439 _ => {
440 output.push_str(&format!("{}{}: ", indent, TOON_CONTAINER));
441 self.serialize_value(value, depth, output)?;
442 output.push('\n');
443 }
444 },
445 JSONLD_DIRECTION => {
447 output.push_str(&format!("{}{}: ", indent, TOON_DIRECTION));
448 self.serialize_value(value, depth, output)?;
449 output.push('\n');
450 }
451 JSONLD_IMPORT => {
453 output.push_str(&format!("{}{}: ", indent, TOON_IMPORT));
454 self.serialize_value(value, depth, output)?;
455 output.push('\n');
456 }
457 JSONLD_JSON => {
459 output.push_str(&format!("{}{}: ", indent, TOON_JSON));
460 let json_str = serde_json::to_string(value).unwrap_or_else(|_| "null".to_string());
462 output.push_str(&format!("\"{}\"", json_str.replace('"', "\\\"")));
463 output.push('\n');
464 }
465 JSONLD_NONE => {
467 output.push_str(&format!("{}{}: ", indent, TOON_NONE));
468 self.serialize_value(value, depth, output)?;
469 output.push('\n');
470 }
471 JSONLD_PREFIX => {
473 output.push_str(&format!("{}{}: ", indent, TOON_PREFIX));
474 self.serialize_value(value, depth, output)?;
475 output.push('\n');
476 }
477 JSONLD_PROPAGATE => {
479 output.push_str(&format!("{}{}: ", indent, TOON_PROPAGATE));
480 self.serialize_value(value, depth, output)?;
481 output.push('\n');
482 }
483 JSONLD_PROTECTED => {
485 output.push_str(&format!("{}{}: ", indent, TOON_PROTECTED));
486 self.serialize_value(value, depth, output)?;
487 output.push('\n');
488 }
489 JSONLD_VERSION => {
491 output.push_str(&format!("{}{}: ", indent, TOON_VERSION));
492 self.serialize_value(value, depth, output)?;
493 output.push('\n');
494 }
495 _ => {
497 let compact_key = self.context.compact_uri(key);
498 match value {
499 Value::Array(arr) => {
500 self.serialize_keyed_array(&compact_key, arr, depth, output)?;
501 }
502 Value::Object(nested) => {
503 output.push_str(&format!("{}{}:\n", indent, compact_key));
504 self.serialize_object(nested, depth + 1, output)?;
505 }
506 _ => {
507 output.push_str(&format!("{}{}: ", indent, compact_key));
508 self.serialize_value(value, depth, output)?;
509 output.push('\n');
510 }
511 }
512 }
513 }
514 Ok(())
515 }
516
517 fn get_display_key(&self, key: &str) -> String {
519 if let Some(toon_key) = get_toon_keyword(key) {
522 toon_key.to_string()
523 } else {
524 self.context.compact_uri(key)
525 }
526 }
527
528 fn serialize_context(&self, value: &Value, depth: usize, output: &mut String) -> Result<()> {
530 let indent = self.make_indent(depth);
531 output.push_str(&format!("{}{}:\n", indent, JSONLD_CONTEXT));
532
533 match value {
534 Value::Object(ctx) => {
535 let ctx_indent = self.make_indent(depth + 1);
536 for (prefix, uri) in ctx {
537 output.push_str(&format!("{}{}: ", ctx_indent, prefix));
538 self.serialize_value(uri, depth + 1, output)?;
539 output.push('\n');
540 }
541 }
542 Value::Array(arr) => {
543 for item in arr {
545 self.serialize_context(item, depth + 1, output)?;
546 }
547 }
548 Value::String(s) => {
549 let ctx_indent = self.make_indent(depth + 1);
550 output.push_str(&format!("{}{}\n", ctx_indent, self.quote_if_needed(s)));
551 }
552 _ => {
553 self.serialize_value(value, depth + 1, output)?;
554 output.push('\n');
555 }
556 }
557 Ok(())
558 }
559
560 pub fn serialize_keyed_array(
562 &self,
563 key: &str,
564 arr: &[Value],
565 depth: usize,
566 output: &mut String,
567 ) -> Result<()> {
568 let indent = self.make_indent(depth);
569
570 if arr.is_empty() {
571 output.push_str(&format!("{}{}[0]:\n", indent, key));
572 return Ok(());
573 }
574
575 if let Some(fields) = self.get_tabular_fields(arr) {
577 if self.enable_shape_partitioning {
579 let sparsity = self.calculate_sparsity(arr, &fields);
580
581 if sparsity > SPARSITY_THRESHOLD {
583 return self.serialize_partitioned_array(key, arr, depth, output);
584 }
585 }
586
587 self.serialize_tabular_array(key, arr, &fields, depth, output)?;
589 } else if self.is_primitive_array(arr) {
590 self.serialize_primitive_array(key, arr, depth, output)?;
591 } else {
592 output.push_str(&format!("{}{}[{}]:\n", indent, key, arr.len()));
594 for item in arr {
595 let item_indent = self.make_indent(depth + 1);
596 output.push_str(&item_indent);
597 output.push_str("- ");
598 match item {
599 Value::Object(obj) => {
600 output.push('\n');
601 self.serialize_object(obj, depth + 2, output)?;
602 }
603 _ => {
604 self.serialize_value(item, depth + 1, output)?;
605 output.push('\n');
606 }
607 }
608 }
609 }
610 Ok(())
611 }
612
613 fn get_tabular_fields(&self, arr: &[Value]) -> Option<Vec<String>> {
618 if arr.is_empty() {
619 return None;
620 }
621
622 let mut all_keys: HashSet<String> = HashSet::new();
624
625 for item in arr {
626 match item {
627 Value::Object(obj) => {
628 for key in obj.keys() {
629 all_keys.insert(key.clone());
630 }
631 }
632 _ => return None,
634 }
635 }
636
637 if all_keys.is_empty() {
638 return None;
639 }
640
641 let mut fields: Vec<String> = all_keys.into_iter().collect();
643 fields.sort_by(|a, b| {
644 keyword_order(a)
645 .cmp(&keyword_order(b))
646 .then_with(|| a.cmp(b))
647 });
648 Some(fields)
649 }
650
651 fn is_primitive_array(&self, arr: &[Value]) -> bool {
653 arr.iter().all(|v| {
654 matches!(
655 v,
656 Value::Null | Value::Bool(_) | Value::Number(_) | Value::String(_)
657 )
658 })
659 }
660
661 fn serialize_tabular_array(
663 &self,
664 key: &str,
665 arr: &[Value],
666 fields: &[String],
667 depth: usize,
668 output: &mut String,
669 ) -> Result<()> {
670 let indent = self.make_indent(depth);
671 let row_indent = self.make_indent(depth + 1);
672
673 let compact_fields: Vec<String> =
675 fields.iter().map(|f| self.context.compact_uri(f)).collect();
676
677 output.push_str(&format!(
679 "{}{}[{}]{{{}}}:\n",
680 indent,
681 key,
682 arr.len(),
683 compact_fields.join(",")
684 ));
685
686 for item in arr {
688 if let Value::Object(obj) = item {
689 let values: Vec<String> = fields
690 .iter()
691 .map(|field| {
692 obj.get(field)
693 .map(|v| self.value_to_csv_cell(v))
694 .unwrap_or_else(|| "null".to_string())
695 })
696 .collect();
697 output.push_str(&format!("{}{}\n", row_indent, values.join(", ")));
698 }
699 }
700
701 Ok(())
702 }
703
704 fn serialize_primitive_array(
706 &self,
707 key: &str,
708 arr: &[Value],
709 depth: usize,
710 output: &mut String,
711 ) -> Result<()> {
712 let indent = self.make_indent(depth);
713
714 let values: Vec<String> = arr.iter().map(|v| self.value_to_csv_cell(v)).collect();
715 let inline = values.join(", ");
716
717 if inline.len() < MAX_INLINE_ARRAY_LENGTH {
719 output.push_str(&format!("{}{}[{}]: {}\n", indent, key, arr.len(), inline));
720 } else {
721 output.push_str(&format!("{}{}[{}]:\n", indent, key, arr.len()));
723 let row_indent = self.make_indent(depth + 1);
724 for value in &values {
725 output.push_str(&format!("{}{}\n", row_indent, value));
726 }
727 }
728
729 Ok(())
730 }
731
732 fn value_to_csv_cell(&self, value: &Value) -> String {
734 match value {
735 Value::Null => "null".to_string(),
736 Value::Bool(b) => if *b { "true" } else { "false" }.to_string(),
737 Value::Number(n) => n.to_string(),
738 Value::String(s) => self.quote_if_needed(s),
739 Value::Array(_) | Value::Object(_) => {
740 let json = serde_json::to_string(value).unwrap_or_else(|_| "null".to_string());
742 format!("\"{}\"", json.replace('"', "\\\""))
743 }
744 }
745 }
746
747 fn quote_if_needed(&self, s: &str) -> String {
749 if s.is_empty() {
750 return "\"\"".to_string();
751 }
752 if NEEDS_QUOTE_REGEX.is_match(s) {
753 format!("\"{}\"", s.replace('"', "\\\""))
754 } else {
755 s.to_string()
756 }
757 }
758
759 #[inline]
761 fn make_indent(&self, depth: usize) -> String {
762 " ".repeat(depth * self.indent_size)
763 }
764
765 fn calculate_sparsity(&self, arr: &[Value], fields: &[String]) -> f64 {
768 if arr.is_empty() || fields.is_empty() {
769 return 0.0;
770 }
771
772 let mut null_count = 0;
773 let total_cells = arr.len() * fields.len();
774
775 for item in arr {
776 if let Value::Object(obj) = item {
777 for field in fields {
778 if !obj.contains_key(field) {
779 null_count += 1;
780 }
781 }
782 }
783 }
784
785 null_count as f64 / total_cells as f64
786 }
787
788 fn entity_signature(&self, obj: &Map<String, Value>) -> String {
791 let mut keys: Vec<&String> = obj.keys().collect();
792 keys.sort();
793 keys.into_iter()
794 .map(|k| k.as_str())
795 .collect::<Vec<&str>>()
796 .join("|")
797 }
798
799 fn partition_by_shape<'a>(
802 &self,
803 arr: &'a [Value],
804 ) -> Vec<(String, Vec<String>, Vec<&'a Value>)> {
805 use std::collections::HashMap;
806
807 let mut shape_map: HashMap<String, Vec<&Value>> = HashMap::new();
808
809 for item in arr {
811 if let Value::Object(obj) = item {
812 let sig = self.entity_signature(obj);
813 shape_map.entry(sig).or_default().push(item);
814 }
815 }
816
817 let mut partitions: Vec<(String, Vec<String>, Vec<&Value>)> = shape_map
819 .into_iter()
820 .map(|(sig, entities)| {
821 let fields: Vec<String> = sig.split('|').map(String::from).collect();
822 (sig, fields, entities)
823 })
824 .collect();
825
826 partitions.sort_by(|a, b| b.2.len().cmp(&a.2.len()));
828
829 partitions
830 }
831
832 fn serialize_partitioned_array(
835 &self,
836 key: &str,
837 arr: &[Value],
838 depth: usize,
839 output: &mut String,
840 ) -> Result<()> {
841 let partitions = self.partition_by_shape(arr);
842 let indent = self.make_indent(depth);
843 let row_indent = self.make_indent(depth + 1);
844
845 for (idx, (_sig, fields, entities)) in partitions.iter().enumerate() {
846 if idx > 0 {
848 output.push('\n');
849 }
850
851 let compact_fields: Vec<String> =
853 fields.iter().map(|f| self.context.compact_uri(f)).collect();
854
855 output.push_str(&format!(
857 "{}{}[{}]{{{}}}:\n",
858 indent,
859 key,
860 entities.len(),
861 compact_fields.join(",")
862 ));
863
864 for entity in entities {
866 if let Value::Object(obj) = entity {
867 let values: Vec<String> = fields
868 .iter()
869 .map(|field| {
870 obj.get(field)
871 .map(|v| self.value_to_csv_cell(v))
872 .unwrap_or_else(|| "null".to_string())
873 })
874 .collect();
875 output.push_str(&format!("{}{}\n", row_indent, values.join(", ")));
876 }
877 }
878 }
879
880 Ok(())
881 }
882}
883
884#[cfg(test)]
885mod tests {
886 use super::*;
887 use serde_json::json;
888
889 #[test]
890 fn test_new_serializer() {
891 let serializer = ToonSerializer::new();
892 assert_eq!(serializer.indent_size(), DEFAULT_INDENT_SIZE);
893 assert!(serializer.context().is_empty());
894 }
895
896 #[test]
897 fn test_with_indent_size() {
898 let serializer = ToonSerializer::new().with_indent_size(4);
899 assert_eq!(serializer.indent_size(), 4);
900 }
901
902 #[test]
903 fn test_with_context() {
904 let mut ctx = JsonLdContext::new();
905 ctx.add_prefix("foaf", "http://xmlns.com/foaf/0.1/");
906
907 let serializer = ToonSerializer::new().with_context(ctx);
908 assert!(serializer.context().has_prefixes());
909 }
910
911 #[test]
912 fn test_serialize_primitives() {
913 let serializer = ToonSerializer::new();
914
915 let value = json!({
916 "name": "Alice",
917 "age": 30,
918 "active": true,
919 "score": null
920 });
921
922 let toon = serializer.serialize(&value).unwrap();
923 assert!(toon.contains("name: Alice"));
924 assert!(toon.contains("age: 30"));
925 assert!(toon.contains("active: true"));
926 assert!(toon.contains("score: null"));
927 }
928
929 #[test]
930 fn test_serialize_primitive_array() {
931 let serializer = ToonSerializer::new();
932
933 let value = json!({
934 "tags": ["rust", "wasm", "python"]
935 });
936
937 let toon = serializer.serialize(&value).unwrap();
938 assert!(toon.contains("tags[3]:"));
939 assert!(toon.contains("rust"));
940 }
941
942 #[test]
943 fn test_serialize_tabular_array() {
944 let serializer = ToonSerializer::new();
945
946 let value = json!({
947 "people": [
948 {"name": "Alice", "age": 30},
949 {"name": "Bob", "age": 25}
950 ]
951 });
952
953 let toon = serializer.serialize(&value).unwrap();
954 assert!(toon.contains("people[2]{"));
955 assert!(toon.contains("Alice"));
956 assert!(toon.contains("Bob"));
957 }
958
959 #[test]
960 fn test_serialize_empty_array() {
961 let serializer = ToonSerializer::new();
962
963 let value = json!({
964 "items": []
965 });
966
967 let toon = serializer.serialize(&value).unwrap();
968 assert!(toon.contains("items[0]:"));
969 }
970
971 #[test]
972 fn test_serialize_nested_object() {
973 let serializer = ToonSerializer::new();
974
975 let value = json!({
976 "person": {
977 "name": "Alice",
978 "address": {
979 "city": "Seattle"
980 }
981 }
982 });
983
984 let toon = serializer.serialize(&value).unwrap();
985 assert!(toon.contains("person:"));
986 assert!(toon.contains("address:"));
987 assert!(toon.contains("city: Seattle"));
988 }
989
990 #[test]
991 fn test_quote_if_needed() {
992 let serializer = ToonSerializer::new();
993
994 assert_eq!(serializer.quote_if_needed("hello"), "hello");
995 assert_eq!(
996 serializer.quote_if_needed("hello, world"),
997 "\"hello, world\""
998 );
999 assert_eq!(serializer.quote_if_needed("key: value"), "\"key: value\"");
1000 assert_eq!(serializer.quote_if_needed("a|b"), "\"a|b\"");
1001 assert_eq!(serializer.quote_if_needed(""), "\"\"");
1002 assert_eq!(serializer.quote_if_needed(" leading"), "\" leading\"");
1003 assert_eq!(serializer.quote_if_needed("trailing "), "\"trailing \"");
1004 }
1005
1006 #[test]
1007 fn test_serialize_with_context_compaction() {
1008 let mut ctx = JsonLdContext::new();
1009 ctx.add_prefix("foaf", "http://xmlns.com/foaf/0.1/");
1010
1011 let serializer = ToonSerializer::new().with_context(ctx);
1012
1013 let value = json!({
1014 "http://xmlns.com/foaf/0.1/name": "Alice"
1015 });
1016
1017 let toon = serializer.serialize(&value).unwrap();
1018 assert!(toon.contains("foaf:name"));
1019 }
1020
1021 #[test]
1022 fn test_serialize_value_node_with_language() {
1023 let serializer = ToonSerializer::new();
1024
1025 let value = json!({
1026 "title": {
1027 "@value": "Bonjour",
1028 "@language": "fr"
1029 }
1030 });
1031
1032 let toon = serializer.serialize(&value).unwrap();
1033 assert!(toon.contains("@value"));
1035 assert!(toon.contains("Bonjour"));
1036 assert!(toon.contains("@language"));
1037 assert!(toon.contains("fr"));
1038 }
1039
1040 #[test]
1041 fn test_serialize_value_node_with_type() {
1042 let mut ctx = JsonLdContext::new();
1043 ctx.add_prefix("xsd", "http://www.w3.org/2001/XMLSchema#");
1044
1045 let serializer = ToonSerializer::new().with_context(ctx);
1046
1047 let value = json!({
1048 "date": {
1049 "@value": "2024-01-15",
1050 "@type": "http://www.w3.org/2001/XMLSchema#date"
1051 }
1052 });
1053
1054 let toon = serializer.serialize(&value).unwrap();
1055 assert!(toon.contains("@value"));
1057 assert!(toon.contains("2024-01-15"));
1058 assert!(toon.contains("@type"));
1059 assert!(toon.contains("xsd:date"));
1060 }
1061
1062 #[test]
1063 fn test_serialize_context() {
1064 let serializer = ToonSerializer::new();
1065
1066 let value = json!({
1067 "@context": {
1068 "foaf": "http://xmlns.com/foaf/0.1/",
1069 "schema": "http://schema.org/"
1070 },
1071 "name": "Test"
1072 });
1073
1074 let toon = serializer.serialize(&value).unwrap();
1075 assert!(toon.contains("@context:"));
1076 assert!(toon.contains("foaf:"));
1077 assert!(toon.contains("schema:"));
1078 }
1079
1080 #[test]
1081 fn test_serialize_graph() {
1082 let serializer = ToonSerializer::new();
1083
1084 let value = json!({
1085 "@graph": [
1086 {"@id": "ex:1", "name": "Alice"},
1087 {"@id": "ex:2", "name": "Bob"}
1088 ]
1089 });
1090
1091 let toon = serializer.serialize(&value).unwrap();
1092 assert!(toon.contains("@graph[2]"));
1093 }
1094
1095 #[test]
1096 fn test_serialize_json_string() {
1097 let serializer = ToonSerializer::new();
1098
1099 let toon = serializer
1100 .serialize_json(r#"{"name": "Alice", "age": 30}"#)
1101 .unwrap();
1102 assert!(toon.contains("name: Alice"));
1103 assert!(toon.contains("age: 30"));
1104 }
1105
1106 #[test]
1107 fn test_tabular_array_union_of_keys() {
1108 let serializer = ToonSerializer::new().with_shape_partitioning(false);
1110
1111 let value = json!({
1112 "items": [
1113 {"a": 1, "b": 2},
1114 {"a": 3, "c": 4}
1115 ]
1116 });
1117
1118 let toon = serializer.serialize(&value).unwrap();
1119 assert!(toon.contains("items[2]{a,b,c}:"));
1121 assert!(toon.contains("1, 2, null"));
1123 assert!(toon.contains("3, null, 4"));
1124 }
1125
1126 #[test]
1127 fn test_shape_partitioning_disabled() {
1128 let serializer = ToonSerializer::new().with_shape_partitioning(false);
1130
1131 let value = json!({
1132 "items": [
1133 {"a": 1, "b": 2},
1134 {"a": 3, "c": 4},
1135 {"x": 5, "y": 6}
1136 ]
1137 });
1138
1139 let toon = serializer.serialize(&value).unwrap();
1140 assert!(toon.contains("items[3]{a,b,c,x,y}:"));
1142 }
1143
1144 #[test]
1145 fn test_shape_partitioning_low_sparsity() {
1146 let serializer = ToonSerializer::new();
1148
1149 let value = json!({
1150 "items": [
1151 {"a": 1, "b": 2},
1152 {"a": 3, "b": 4},
1153 {"a": 5, "b": 6}
1154 ]
1155 });
1156
1157 let toon = serializer.serialize(&value).unwrap();
1158 assert!(toon.contains("items[3]{a,b}:"));
1160 assert!(!toon.contains("items[1]")); }
1162
1163 #[test]
1164 fn test_shape_partitioning_high_sparsity() {
1165 let serializer = ToonSerializer::new();
1167
1168 let value = json!({
1169 "people": [
1170 {"@id": "ex:1", "name": "Alice", "age": 30, "email": "alice@example.com"},
1171 {"@id": "ex:2", "name": "Bob", "phone": "+1234567890", "address": "123 Main St"},
1172 {"@id": "ex:3", "name": "Carol", "company": "ACME", "role": "Engineer", "salary": 100000}
1173 ]
1174 });
1175
1176 let toon = serializer.serialize(&value).unwrap();
1177
1178 assert!(
1181 toon.contains("people[1]"),
1182 "Should have partitioned blocks with [1]"
1183 );
1184
1185 let people_blocks = toon.matches("people[").count();
1187 assert_eq!(
1188 people_blocks, 3,
1189 "Should have 3 separate blocks (one per entity) due to completely different shapes"
1190 );
1191
1192 assert!(
1194 !toon.contains("people[3]"),
1195 "Should not have a single block with all 3 entities"
1196 );
1197 }
1198
1199 #[test]
1200 fn test_shape_partitioning_heterogeneous_graph() {
1201 let serializer = ToonSerializer::new();
1202
1203 let value = json!({
1204 "@graph": [
1205 {"@id": "ex:person1", "@type": "Person", "name": "Alice", "age": 30, "email": "alice@example.com"},
1206 {"@id": "ex:person2", "@type": "Person", "name": "Bob", "age": 25, "email": "bob@example.com"},
1207 {"@id": "ex:org1", "@type": "Organization", "name": "ACME", "industry": "Tech", "founded": 2000, "employees": 500, "revenue": 10000000},
1208 {"@id": "ex:org2", "@type": "Organization", "name": "XYZ", "industry": "Finance", "founded": 1995, "employees": 300, "revenue": 5000000}
1209 ]
1210 });
1211
1212 let toon = serializer.serialize(&value).unwrap();
1213 assert!(toon.contains("@graph[2]"));
1215 let graph_count = toon.matches("@graph[").count();
1217 assert_eq!(graph_count, 2, "Should have 2 @graph blocks");
1218 }
1219
1220 #[test]
1221 fn test_calculate_sparsity() {
1222 let serializer = ToonSerializer::new();
1223
1224 let high_sparse = vec![json!({"a": 1}), json!({"b": 2}), json!({"c": 3})];
1226 let fields = vec!["a".to_string(), "b".to_string(), "c".to_string()];
1227 let sparsity = serializer.calculate_sparsity(&high_sparse, &fields);
1228 assert!(sparsity > 0.6, "Should have high sparsity (~66%)");
1229
1230 let low_sparse = vec![json!({"a": 1, "b": 2}), json!({"a": 3, "b": 4})];
1232 let fields = vec!["a".to_string(), "b".to_string()];
1233 let sparsity = serializer.calculate_sparsity(&low_sparse, &fields);
1234 assert_eq!(sparsity, 0.0, "Should have zero sparsity");
1235 }
1236
1237 #[test]
1238 fn test_entity_signature() {
1239 let serializer = ToonSerializer::new();
1240
1241 let obj1 =
1242 serde_json::from_str::<Map<String, Value>>(r#"{"name": "Alice", "age": 30}"#).unwrap();
1243 let obj2 =
1244 serde_json::from_str::<Map<String, Value>>(r#"{"age": 30, "name": "Bob"}"#).unwrap();
1245 let obj3 = serde_json::from_str::<Map<String, Value>>(
1246 r#"{"name": "Carol", "email": "c@example.com"}"#,
1247 )
1248 .unwrap();
1249
1250 let sig1 = serializer.entity_signature(&obj1);
1251 let sig2 = serializer.entity_signature(&obj2);
1252 let sig3 = serializer.entity_signature(&obj3);
1253
1254 assert_eq!(sig1, sig2);
1256 assert_eq!(sig1, "age|name");
1257
1258 assert_ne!(sig1, sig3);
1260 assert_eq!(sig3, "email|name");
1261 }
1262
1263 #[test]
1264 fn test_partition_by_shape() {
1265 let serializer = ToonSerializer::new();
1266
1267 let arr = vec![
1268 json!({"a": 1, "b": 2}),
1269 json!({"a": 3, "b": 4}),
1270 json!({"x": 5, "y": 6}),
1271 json!({"x": 7, "y": 8}),
1272 json!({"x": 9, "y": 10}),
1273 ];
1274
1275 let partitions = serializer.partition_by_shape(&arr);
1276
1277 assert_eq!(partitions.len(), 2);
1279
1280 assert_eq!(partitions[0].2.len(), 3);
1282 assert_eq!(partitions[1].2.len(), 2);
1283 }
1284
1285 #[test]
1286 fn test_shape_partitioning_roundtrip() {
1287 use crate::ToonParser;
1288
1289 let serializer = ToonSerializer::new();
1290 let parser = ToonParser::new();
1291
1292 let original = json!({
1293 "@graph": [
1294 {"@id": "ex:1", "@type": "Person", "name": "Alice", "age": 30},
1295 {"@id": "ex:2", "@type": "Person", "name": "Bob", "age": 25},
1296 {"@id": "ex:3", "@type": "Org", "name": "ACME", "industry": "Tech"}
1297 ]
1298 });
1299
1300 let toon = serializer.serialize(&original).unwrap();
1302
1303 let parsed = parser.parse(&toon).unwrap();
1305
1306 let graph = parsed.get("@graph").expect("Should have @graph");
1308 assert!(graph.is_array());
1309 let graph_arr = graph.as_array().unwrap();
1310 assert_eq!(
1311 graph_arr.len(),
1312 3,
1313 "Should have all 3 entities after parsing"
1314 );
1315
1316 assert!(graph_arr
1318 .iter()
1319 .any(|v| v.get("@id").and_then(|id| id.as_str()) == Some("ex:1")));
1320 assert!(graph_arr
1321 .iter()
1322 .any(|v| v.get("@id").and_then(|id| id.as_str()) == Some("ex:2")));
1323 assert!(graph_arr
1324 .iter()
1325 .any(|v| v.get("@id").and_then(|id| id.as_str()) == Some("ex:3")));
1326 }
1327}