toon_core/
serializer.rs

1//! TOON-LD Serializer
2//!
3//! This module provides the `ToonSerializer` struct for converting JSON/JSON-LD
4//! values to TOON-LD format.
5
6use once_cell::sync::Lazy;
7use regex::Regex;
8use serde_json::{Map, Value};
9use std::collections::HashSet;
10
11use crate::context::JsonLdContext;
12use crate::error::Result;
13use crate::keywords::*;
14
15/// Regex for detecting values that need quoting
16static NEEDS_QUOTE_REGEX: Lazy<Regex> =
17    Lazy::new(|| Regex::new(r#"[,:|]|^\s|\s$"#).expect("NEEDS_QUOTE_REGEX is invalid"));
18
19/// Default indentation size (2 spaces)
20const DEFAULT_INDENT_SIZE: usize = 2;
21
22/// Maximum inline array length before switching to multi-line format
23const MAX_INLINE_ARRAY_LENGTH: usize = 60;
24
25/// Sparsity threshold for enabling shape-based partitioning (30%)
26/// If null cells exceed this percentage, arrays will be partitioned by shape
27const SPARSITY_THRESHOLD: f64 = 0.30;
28
29/// TOON-LD Serializer
30///
31/// Converts JSON/JSON-LD values to TOON-LD format. The serializer handles:
32/// - Tabular arrays (arrays of objects with union-of-keys)
33/// - Primitive arrays (inline or multi-line)
34/// - JSON-LD keywords and context-based URI compaction
35/// - Value nodes with language tags and datatypes (using standard TOON object syntax)
36///
37/// # Example
38///
39/// ```
40/// use toon_core::{ToonSerializer, JsonLdContext};
41/// use serde_json::json;
42///
43/// let serializer = ToonSerializer::new();
44/// let value = json!({
45///     "name": "Alice",
46///     "age": 30
47/// });
48///
49/// let toon = serializer.serialize(&value).unwrap();
50/// assert!(toon.contains("name: Alice"));
51/// assert!(toon.contains("age: 30"));
52/// ```
53#[derive(Debug, Clone)]
54pub struct ToonSerializer {
55    /// JSON-LD context for URI compaction
56    context: JsonLdContext,
57    /// Number of spaces per indentation level
58    indent_size: usize,
59    /// Enable shape-based partitioning for sparse arrays
60    enable_shape_partitioning: bool,
61}
62
63impl Default for ToonSerializer {
64    fn default() -> Self {
65        Self::new()
66    }
67}
68
69impl ToonSerializer {
70    /// Create a new serializer with default settings.
71    ///
72    /// # Example
73    ///
74    /// ```
75    /// use toon_core::ToonSerializer;
76    ///
77    /// let serializer = ToonSerializer::new();
78    /// ```
79    pub fn new() -> Self {
80        Self {
81            context: JsonLdContext::new(),
82            indent_size: DEFAULT_INDENT_SIZE,
83            enable_shape_partitioning: true,
84        }
85    }
86
87    /// Set the JSON-LD context for URI compaction.
88    ///
89    /// # Arguments
90    ///
91    /// * `context` - The JSON-LD context to use
92    ///
93    /// # Example
94    ///
95    /// ```
96    /// use toon_core::{ToonSerializer, JsonLdContext};
97    ///
98    /// let mut ctx = JsonLdContext::new();
99    /// ctx.add_prefix("foaf", "http://xmlns.com/foaf/0.1/");
100    ///
101    /// let serializer = ToonSerializer::new().with_context(ctx);
102    /// ```
103    pub fn with_context(mut self, context: JsonLdContext) -> Self {
104        self.context = context;
105        self
106    }
107
108    /// Set the indentation size.
109    ///
110    /// # Arguments
111    ///
112    /// * `size` - Number of spaces per indentation level
113    ///
114    /// # Example
115    ///
116    /// ```
117    /// use toon_core::ToonSerializer;
118    ///
119    /// let serializer = ToonSerializer::new().with_indent_size(4);
120    /// ```
121    pub fn with_indent_size(mut self, size: usize) -> Self {
122        self.indent_size = size;
123        self
124    }
125
126    /// Get a reference to the current context.
127    pub fn context(&self) -> &JsonLdContext {
128        &self.context
129    }
130
131    /// Get the current indentation size.
132    pub fn indent_size(&self) -> usize {
133        self.indent_size
134    }
135
136    /// Enable or disable shape-based partitioning for sparse arrays.
137    ///
138    /// When enabled, arrays with high sparsity (> 30% null values) will be
139    /// automatically partitioned by entity shape to reduce null delimiter overhead.
140    ///
141    /// # Arguments
142    ///
143    /// * `enable` - Whether to enable shape-based partitioning
144    ///
145    /// # Example
146    ///
147    /// ```
148    /// use toon_core::ToonSerializer;
149    ///
150    /// let serializer = ToonSerializer::new().with_shape_partitioning(true);
151    /// ```
152    pub fn with_shape_partitioning(mut self, enable: bool) -> Self {
153        self.enable_shape_partitioning = enable;
154        self
155    }
156
157    /// Serialize a JSON value to TOON-LD format.
158    ///
159    /// # Arguments
160    ///
161    /// * `value` - The JSON value to serialize
162    ///
163    /// # Returns
164    ///
165    /// A `Result` containing the TOON-LD string or an error.
166    ///
167    /// # Example
168    ///
169    /// ```
170    /// use toon_core::ToonSerializer;
171    /// use serde_json::json;
172    ///
173    /// let serializer = ToonSerializer::new();
174    /// let value = json!({"name": "Alice", "age": 30});
175    /// let toon = serializer.serialize(&value).unwrap();
176    /// ```
177    pub fn serialize(&self, value: &Value) -> Result<String> {
178        let mut output = String::new();
179        self.serialize_value(value, 0, &mut output)?;
180        Ok(output)
181    }
182
183    /// Serialize a JSON string to TOON-LD string.
184    ///
185    /// # Arguments
186    ///
187    /// * `json` - A JSON string to parse and serialize
188    ///
189    /// # Returns
190    ///
191    /// A `Result` containing the TOON-LD string or an error.
192    ///
193    /// # Example
194    ///
195    /// ```
196    /// use toon_core::ToonSerializer;
197    ///
198    /// let serializer = ToonSerializer::new();
199    /// let toon = serializer.serialize_json(r#"{"name": "Alice"}"#).unwrap();
200    /// ```
201    pub fn serialize_json(&self, json: &str) -> Result<String> {
202        let value: Value = serde_json::from_str(json)?;
203        self.serialize(&value)
204    }
205
206    /// Serialize a JSON value at a given depth.
207    fn serialize_value(&self, value: &Value, depth: usize, output: &mut String) -> Result<()> {
208        match value {
209            Value::Null => output.push_str("null"),
210            Value::Bool(b) => output.push_str(if *b { "true" } else { "false" }),
211            Value::Number(n) => output.push_str(&n.to_string()),
212            Value::String(s) => output.push_str(&self.quote_if_needed(s)),
213            Value::Array(arr) => self.serialize_standalone_array(arr, depth, output)?,
214            Value::Object(obj) => self.serialize_object(obj, depth, output)?,
215        }
216        Ok(())
217    }
218
219    /// Serialize a standalone array (without a key, e.g., top-level array).
220    fn serialize_standalone_array(
221        &self,
222        arr: &[Value],
223        depth: usize,
224        output: &mut String,
225    ) -> Result<()> {
226        let indent = self.make_indent(depth);
227
228        if arr.is_empty() {
229            output.push_str("[]");
230            return Ok(());
231        }
232
233        // Check if this is an array of objects (can use tabular format)
234        if let Some(fields) = self.get_tabular_fields(arr) {
235            // Use anonymous tabular format
236            let compact_fields: Vec<String> =
237                fields.iter().map(|f| self.context.compact_uri(f)).collect();
238            output.push_str(&format!(
239                "[{}]{{{}}}:\n",
240                arr.len(),
241                compact_fields.join(",")
242            ));
243            let row_indent = self.make_indent(depth + 1);
244            for item in arr {
245                if let Value::Object(obj) = item {
246                    let values: Vec<String> = fields
247                        .iter()
248                        .map(|field| {
249                            obj.get(field)
250                                .map(|v| self.value_to_csv_cell(v))
251                                .unwrap_or_else(|| "null".to_string())
252                        })
253                        .collect();
254                    output.push_str(&format!("{}{}\n", row_indent, values.join(", ")));
255                }
256            }
257        } else if self.is_primitive_array(arr) {
258            self.serialize_inline_primitive_array(arr, depth, output)?;
259        } else {
260            // Mixed array
261            output.push_str(&format!("{}[{}]:\n", indent, arr.len()));
262            for item in arr {
263                let item_indent = self.make_indent(depth + 1);
264                output.push_str(&item_indent);
265                output.push_str("- ");
266                match item {
267                    Value::Object(obj) => {
268                        output.push('\n');
269                        self.serialize_object(obj, depth + 2, output)?;
270                    }
271                    _ => {
272                        self.serialize_value(item, depth + 1, output)?;
273                        output.push('\n');
274                    }
275                }
276            }
277        }
278        Ok(())
279    }
280
281    /// Serialize an inline primitive array.
282    fn serialize_inline_primitive_array(
283        &self,
284        arr: &[Value],
285        depth: usize,
286        output: &mut String,
287    ) -> Result<()> {
288        let values: Vec<String> = arr.iter().map(|v| self.value_to_csv_cell(v)).collect();
289        let inline = values.join(", ");
290
291        if inline.len() < MAX_INLINE_ARRAY_LENGTH {
292            output.push_str(&format!("[{}]: {}", arr.len(), inline));
293        } else {
294            output.push_str(&format!("[{}]:\n", arr.len()));
295            let row_indent = self.make_indent(depth + 1);
296            for value in &values {
297                output.push_str(&format!("{}{}\n", row_indent, value));
298            }
299        }
300        Ok(())
301    }
302
303    /// Serialize a JSON object.
304    fn serialize_object(
305        &self,
306        obj: &Map<String, Value>,
307        depth: usize,
308        output: &mut String,
309    ) -> Result<()> {
310        let indent = self.make_indent(depth);
311
312        // Sort keys by keyword order, then alphabetically
313        let mut keys: Vec<&String> = obj.keys().collect();
314        keys.sort_by(|a, b| {
315            keyword_order(a)
316                .cmp(&keyword_order(b))
317                .then_with(|| a.cmp(b))
318        });
319
320        for key in keys {
321            // Safe: we're iterating over keys that exist
322            let value = obj.get(key).expect("key exists in object we're iterating");
323            self.serialize_object_entry(key, value, depth, &indent, output)?;
324        }
325        Ok(())
326    }
327
328    /// Serialize a single object entry (key-value pair).
329    fn serialize_object_entry(
330        &self,
331        key: &str,
332        value: &Value,
333        depth: usize,
334        indent: &str,
335        output: &mut String,
336    ) -> Result<()> {
337        let display_key = self.get_display_key(key);
338
339        match key {
340            // Special handling for @graph - always use tabular if possible
341            JSONLD_GRAPH => {
342                if let Value::Array(arr) = value {
343                    self.serialize_keyed_array(&display_key, arr, depth, output)?;
344                } else {
345                    output.push_str(&format!("{}{}:\n", indent, display_key));
346                    self.serialize_value(value, depth + 1, output)?;
347                }
348            }
349            // @context gets special nested formatting
350            JSONLD_CONTEXT => {
351                self.serialize_context(value, depth, output)?;
352            }
353            // @base and @vocab are simple string values
354            JSONLD_BASE | JSONLD_VOCAB => {
355                output.push_str(&format!("{}{}: ", indent, display_key));
356                self.serialize_value(value, depth, output)?;
357                output.push('\n');
358            }
359            // @id uses simple serialization
360            JSONLD_ID => match value {
361                Value::Array(arr) => {
362                    self.serialize_keyed_array(&display_key, arr, depth, output)?;
363                }
364                _ => {
365                    output.push_str(&format!("{}{}: ", indent, display_key));
366                    self.serialize_value(value, depth, output)?;
367                    output.push('\n');
368                }
369            },
370            // @type values should be compacted with context
371            JSONLD_TYPE => match value {
372                Value::Array(arr) => {
373                    self.serialize_keyed_array(&display_key, arr, depth, output)?;
374                }
375                Value::String(s) => {
376                    // Compact the type URI using context
377                    let compact_type = self.context.compact_uri(s);
378                    output.push_str(&format!("{}{}: {}\n", indent, display_key, compact_type));
379                }
380                _ => {
381                    output.push_str(&format!("{}{}: ", indent, display_key));
382                    self.serialize_value(value, depth, output)?;
383                    output.push('\n');
384                }
385            },
386            // @reverse contains nested object with reverse properties
387            JSONLD_REVERSE => {
388                output.push_str(&format!("{}{}:\n", indent, TOON_REVERSE));
389                if let Value::Object(rev_obj) = value {
390                    self.serialize_object(rev_obj, depth + 1, output)?;
391                }
392            }
393            // @list is an ordered array
394            JSONLD_LIST => {
395                if let Value::Array(arr) = value {
396                    self.serialize_keyed_array(TOON_LIST, arr, depth, output)?;
397                }
398            }
399            // @set is an explicit unordered set
400            JSONLD_SET => {
401                if let Value::Array(arr) = value {
402                    self.serialize_keyed_array(TOON_SET, arr, depth, output)?;
403                }
404            }
405            // @value, @language - serialize as normal keys (value nodes use standard TOON object syntax)
406            // Note: @type is handled above as it can be either a node type or a value node datatype
407            JSONLD_VALUE | JSONLD_LANGUAGE => {
408                output.push_str(&format!("{}{}: ", indent, display_key));
409                self.serialize_value(value, depth, output)?;
410                output.push('\n');
411            }
412            // @included contains an array of included nodes
413            JSONLD_INCLUDED => {
414                if let Value::Array(arr) = value {
415                    self.serialize_keyed_array(TOON_INCLUDED, arr, depth, output)?;
416                } else {
417                    output.push_str(&format!("{}{}:\n", indent, TOON_INCLUDED));
418                    self.serialize_value(value, depth + 1, output)?;
419                }
420            }
421            // @index is a simple string value
422            JSONLD_INDEX => {
423                output.push_str(&format!("{}{}: ", indent, TOON_INDEX));
424                self.serialize_value(value, depth, output)?;
425                output.push('\n');
426            }
427            // @nest contains nested properties object
428            JSONLD_NEST => {
429                output.push_str(&format!("{}{}:\n", indent, TOON_NEST));
430                if let Value::Object(nest_obj) = value {
431                    self.serialize_object(nest_obj, depth + 1, output)?;
432                }
433            }
434            // @container specifies container type
435            JSONLD_CONTAINER => match value {
436                Value::Array(arr) => {
437                    self.serialize_keyed_array(TOON_CONTAINER, arr, depth, output)?;
438                }
439                _ => {
440                    output.push_str(&format!("{}{}: ", indent, TOON_CONTAINER));
441                    self.serialize_value(value, depth, output)?;
442                    output.push('\n');
443                }
444            },
445            // @direction specifies text direction (ltr/rtl)
446            JSONLD_DIRECTION => {
447                output.push_str(&format!("{}{}: ", indent, TOON_DIRECTION));
448                self.serialize_value(value, depth, output)?;
449                output.push('\n');
450            }
451            // @import specifies external context to import
452            JSONLD_IMPORT => {
453                output.push_str(&format!("{}{}: ", indent, TOON_IMPORT));
454                self.serialize_value(value, depth, output)?;
455                output.push('\n');
456            }
457            // @json marks a JSON literal
458            JSONLD_JSON => {
459                output.push_str(&format!("{}{}: ", indent, TOON_JSON));
460                // Serialize as JSON string
461                let json_str = serde_json::to_string(value).unwrap_or_else(|_| "null".to_string());
462                output.push_str(&format!("\"{}\"", json_str.replace('"', "\\\"")));
463                output.push('\n');
464            }
465            // @none is the default index value
466            JSONLD_NONE => {
467                output.push_str(&format!("{}{}: ", indent, TOON_NONE));
468                self.serialize_value(value, depth, output)?;
469                output.push('\n');
470            }
471            // @prefix flag
472            JSONLD_PREFIX => {
473                output.push_str(&format!("{}{}: ", indent, TOON_PREFIX));
474                self.serialize_value(value, depth, output)?;
475                output.push('\n');
476            }
477            // @propagate flag
478            JSONLD_PROPAGATE => {
479                output.push_str(&format!("{}{}: ", indent, TOON_PROPAGATE));
480                self.serialize_value(value, depth, output)?;
481                output.push('\n');
482            }
483            // @protected flag
484            JSONLD_PROTECTED => {
485                output.push_str(&format!("{}{}: ", indent, TOON_PROTECTED));
486                self.serialize_value(value, depth, output)?;
487                output.push('\n');
488            }
489            // @version specifies JSON-LD version
490            JSONLD_VERSION => {
491                output.push_str(&format!("{}{}: ", indent, TOON_VERSION));
492                self.serialize_value(value, depth, output)?;
493                output.push('\n');
494            }
495            // Regular keys
496            _ => {
497                let compact_key = self.context.compact_uri(key);
498                match value {
499                    Value::Array(arr) => {
500                        self.serialize_keyed_array(&compact_key, arr, depth, output)?;
501                    }
502                    Value::Object(nested) => {
503                        output.push_str(&format!("{}{}:\n", indent, compact_key));
504                        self.serialize_object(nested, depth + 1, output)?;
505                    }
506                    _ => {
507                        output.push_str(&format!("{}{}: ", indent, compact_key));
508                        self.serialize_value(value, depth, output)?;
509                        output.push('\n');
510                    }
511                }
512            }
513        }
514        Ok(())
515    }
516
517    /// Get display key for JSON-LD keywords.
518    fn get_display_key(&self, key: &str) -> String {
519        // Use the centralized keyword function, but fall back to context compaction
520        // for non-keyword keys
521        if let Some(toon_key) = get_toon_keyword(key) {
522            toon_key.to_string()
523        } else {
524            self.context.compact_uri(key)
525        }
526    }
527
528    /// Serialize @context in a compact format.
529    fn serialize_context(&self, value: &Value, depth: usize, output: &mut String) -> Result<()> {
530        let indent = self.make_indent(depth);
531        output.push_str(&format!("{}{}:\n", indent, JSONLD_CONTEXT));
532
533        match value {
534            Value::Object(ctx) => {
535                let ctx_indent = self.make_indent(depth + 1);
536                for (prefix, uri) in ctx {
537                    output.push_str(&format!("{}{}: ", ctx_indent, prefix));
538                    self.serialize_value(uri, depth + 1, output)?;
539                    output.push('\n');
540                }
541            }
542            Value::Array(arr) => {
543                // Multiple contexts
544                for item in arr {
545                    self.serialize_context(item, depth + 1, output)?;
546                }
547            }
548            Value::String(s) => {
549                let ctx_indent = self.make_indent(depth + 1);
550                output.push_str(&format!("{}{}\n", ctx_indent, self.quote_if_needed(s)));
551            }
552            _ => {
553                self.serialize_value(value, depth + 1, output)?;
554                output.push('\n');
555            }
556        }
557        Ok(())
558    }
559
560    /// Serialize a keyed array (array with a key prefix).
561    pub fn serialize_keyed_array(
562        &self,
563        key: &str,
564        arr: &[Value],
565        depth: usize,
566        output: &mut String,
567    ) -> Result<()> {
568        let indent = self.make_indent(depth);
569
570        if arr.is_empty() {
571            output.push_str(&format!("{}{}[0]:\n", indent, key));
572            return Ok(());
573        }
574
575        // Check if this is an array of objects (can use tabular format)
576        if let Some(fields) = self.get_tabular_fields(arr) {
577            // Calculate sparsity and decide whether to partition
578            if self.enable_shape_partitioning {
579                let sparsity = self.calculate_sparsity(arr, &fields);
580
581                // If sparsity exceeds threshold, use shape-based partitioning
582                if sparsity > SPARSITY_THRESHOLD {
583                    return self.serialize_partitioned_array(key, arr, depth, output);
584                }
585            }
586
587            // Otherwise use standard union schema approach
588            self.serialize_tabular_array(key, arr, &fields, depth, output)?;
589        } else if self.is_primitive_array(arr) {
590            self.serialize_primitive_array(key, arr, depth, output)?;
591        } else {
592            // Mixed array - serialize each element indented
593            output.push_str(&format!("{}{}[{}]:\n", indent, key, arr.len()));
594            for item in arr {
595                let item_indent = self.make_indent(depth + 1);
596                output.push_str(&item_indent);
597                output.push_str("- ");
598                match item {
599                    Value::Object(obj) => {
600                        output.push('\n');
601                        self.serialize_object(obj, depth + 2, output)?;
602                    }
603                    _ => {
604                        self.serialize_value(item, depth + 1, output)?;
605                        output.push('\n');
606                    }
607                }
608            }
609        }
610        Ok(())
611    }
612
613    /// Get tabular fields from an array of objects.
614    ///
615    /// Returns `Some(fields)` with the union of all keys if all elements are objects.
616    /// Missing fields in individual objects will be filled with null during serialization.
617    fn get_tabular_fields(&self, arr: &[Value]) -> Option<Vec<String>> {
618        if arr.is_empty() {
619            return None;
620        }
621
622        // Collect union of all keys from all objects
623        let mut all_keys: HashSet<String> = HashSet::new();
624
625        for item in arr {
626            match item {
627                Value::Object(obj) => {
628                    for key in obj.keys() {
629                        all_keys.insert(key.clone());
630                    }
631                }
632                // If any element is not an object, cannot use tabular format
633                _ => return None,
634            }
635        }
636
637        if all_keys.is_empty() {
638            return None;
639        }
640
641        // Return keys in consistent order (sorted, with keywords first)
642        let mut fields: Vec<String> = all_keys.into_iter().collect();
643        fields.sort_by(|a, b| {
644            keyword_order(a)
645                .cmp(&keyword_order(b))
646                .then_with(|| a.cmp(b))
647        });
648        Some(fields)
649    }
650
651    /// Check if array contains only primitives.
652    fn is_primitive_array(&self, arr: &[Value]) -> bool {
653        arr.iter().all(|v| {
654            matches!(
655                v,
656                Value::Null | Value::Bool(_) | Value::Number(_) | Value::String(_)
657            )
658        })
659    }
660
661    /// Serialize a tabular array: key[N]{field1,field2}:
662    fn serialize_tabular_array(
663        &self,
664        key: &str,
665        arr: &[Value],
666        fields: &[String],
667        depth: usize,
668        output: &mut String,
669    ) -> Result<()> {
670        let indent = self.make_indent(depth);
671        let row_indent = self.make_indent(depth + 1);
672
673        // Compact field names
674        let compact_fields: Vec<String> =
675            fields.iter().map(|f| self.context.compact_uri(f)).collect();
676
677        // Write header: key[N]{field1,field2}:
678        output.push_str(&format!(
679            "{}{}[{}]{{{}}}:\n",
680            indent,
681            key,
682            arr.len(),
683            compact_fields.join(",")
684        ));
685
686        // Write CSV rows
687        for item in arr {
688            if let Value::Object(obj) = item {
689                let values: Vec<String> = fields
690                    .iter()
691                    .map(|field| {
692                        obj.get(field)
693                            .map(|v| self.value_to_csv_cell(v))
694                            .unwrap_or_else(|| "null".to_string())
695                    })
696                    .collect();
697                output.push_str(&format!("{}{}\n", row_indent, values.join(", ")));
698            }
699        }
700
701        Ok(())
702    }
703
704    /// Serialize a primitive array.
705    fn serialize_primitive_array(
706        &self,
707        key: &str,
708        arr: &[Value],
709        depth: usize,
710        output: &mut String,
711    ) -> Result<()> {
712        let indent = self.make_indent(depth);
713
714        let values: Vec<String> = arr.iter().map(|v| self.value_to_csv_cell(v)).collect();
715        let inline = values.join(", ");
716
717        // If short enough, keep on one line
718        if inline.len() < MAX_INLINE_ARRAY_LENGTH {
719            output.push_str(&format!("{}{}[{}]: {}\n", indent, key, arr.len(), inline));
720        } else {
721            // Multi-line format
722            output.push_str(&format!("{}{}[{}]:\n", indent, key, arr.len()));
723            let row_indent = self.make_indent(depth + 1);
724            for value in &values {
725                output.push_str(&format!("{}{}\n", row_indent, value));
726            }
727        }
728
729        Ok(())
730    }
731
732    /// Convert a value to a CSV cell string.
733    fn value_to_csv_cell(&self, value: &Value) -> String {
734        match value {
735            Value::Null => "null".to_string(),
736            Value::Bool(b) => if *b { "true" } else { "false" }.to_string(),
737            Value::Number(n) => n.to_string(),
738            Value::String(s) => self.quote_if_needed(s),
739            Value::Array(_) | Value::Object(_) => {
740                // Nested structures in CSV cells - serialize as JSON and quote
741                let json = serde_json::to_string(value).unwrap_or_else(|_| "null".to_string());
742                format!("\"{}\"", json.replace('"', "\\\""))
743            }
744        }
745    }
746
747    /// Quote a string if it contains special characters.
748    fn quote_if_needed(&self, s: &str) -> String {
749        if s.is_empty() {
750            return "\"\"".to_string();
751        }
752        if NEEDS_QUOTE_REGEX.is_match(s) {
753            format!("\"{}\"", s.replace('"', "\\\""))
754        } else {
755            s.to_string()
756        }
757    }
758
759    /// Create indentation string for given depth.
760    #[inline]
761    fn make_indent(&self, depth: usize) -> String {
762        " ".repeat(depth * self.indent_size)
763    }
764
765    /// Calculate sparsity of an array with given fields.
766    /// Returns the ratio of null values to total cells.
767    fn calculate_sparsity(&self, arr: &[Value], fields: &[String]) -> f64 {
768        if arr.is_empty() || fields.is_empty() {
769            return 0.0;
770        }
771
772        let mut null_count = 0;
773        let total_cells = arr.len() * fields.len();
774
775        for item in arr {
776            if let Value::Object(obj) = item {
777                for field in fields {
778                    if !obj.contains_key(field) {
779                        null_count += 1;
780                    }
781                }
782            }
783        }
784
785        null_count as f64 / total_cells as f64
786    }
787
788    /// Generate a deterministic signature for an entity based on its keys.
789    /// Keys are sorted alphabetically to ensure consistency.
790    fn entity_signature(&self, obj: &Map<String, Value>) -> String {
791        let mut keys: Vec<&String> = obj.keys().collect();
792        keys.sort();
793        keys.into_iter()
794            .map(|k| k.as_str())
795            .collect::<Vec<&str>>()
796            .join("|")
797    }
798
799    /// Partition array entities by their shape signature.
800    /// Returns a Vec of (signature, fields, entities) tuples.
801    fn partition_by_shape<'a>(
802        &self,
803        arr: &'a [Value],
804    ) -> Vec<(String, Vec<String>, Vec<&'a Value>)> {
805        use std::collections::HashMap;
806
807        let mut shape_map: HashMap<String, Vec<&Value>> = HashMap::new();
808
809        // Group entities by signature
810        for item in arr {
811            if let Value::Object(obj) = item {
812                let sig = self.entity_signature(obj);
813                shape_map.entry(sig).or_default().push(item);
814            }
815        }
816
817        // Convert to sorted output format
818        let mut partitions: Vec<(String, Vec<String>, Vec<&Value>)> = shape_map
819            .into_iter()
820            .map(|(sig, entities)| {
821                let fields: Vec<String> = sig.split('|').map(String::from).collect();
822                (sig, fields, entities)
823            })
824            .collect();
825
826        // Sort by entity count (largest groups first) for better readability
827        partitions.sort_by(|a, b| b.2.len().cmp(&a.2.len()));
828
829        partitions
830    }
831
832    /// Serialize a keyed array using shape-based partitioning.
833    /// Emits multiple array blocks, each with entities of the same shape.
834    fn serialize_partitioned_array(
835        &self,
836        key: &str,
837        arr: &[Value],
838        depth: usize,
839        output: &mut String,
840    ) -> Result<()> {
841        let partitions = self.partition_by_shape(arr);
842        let indent = self.make_indent(depth);
843        let row_indent = self.make_indent(depth + 1);
844
845        for (idx, (_sig, fields, entities)) in partitions.iter().enumerate() {
846            // Add spacing between partitions (except before first)
847            if idx > 0 {
848                output.push('\n');
849            }
850
851            // Compact field names
852            let compact_fields: Vec<String> =
853                fields.iter().map(|f| self.context.compact_uri(f)).collect();
854
855            // Write header: key[N]{field1,field2}:
856            output.push_str(&format!(
857                "{}{}[{}]{{{}}}:\n",
858                indent,
859                key,
860                entities.len(),
861                compact_fields.join(",")
862            ));
863
864            // Write CSV rows
865            for entity in entities {
866                if let Value::Object(obj) = entity {
867                    let values: Vec<String> = fields
868                        .iter()
869                        .map(|field| {
870                            obj.get(field)
871                                .map(|v| self.value_to_csv_cell(v))
872                                .unwrap_or_else(|| "null".to_string())
873                        })
874                        .collect();
875                    output.push_str(&format!("{}{}\n", row_indent, values.join(", ")));
876                }
877            }
878        }
879
880        Ok(())
881    }
882}
883
884#[cfg(test)]
885mod tests {
886    use super::*;
887    use serde_json::json;
888
889    #[test]
890    fn test_new_serializer() {
891        let serializer = ToonSerializer::new();
892        assert_eq!(serializer.indent_size(), DEFAULT_INDENT_SIZE);
893        assert!(serializer.context().is_empty());
894    }
895
896    #[test]
897    fn test_with_indent_size() {
898        let serializer = ToonSerializer::new().with_indent_size(4);
899        assert_eq!(serializer.indent_size(), 4);
900    }
901
902    #[test]
903    fn test_with_context() {
904        let mut ctx = JsonLdContext::new();
905        ctx.add_prefix("foaf", "http://xmlns.com/foaf/0.1/");
906
907        let serializer = ToonSerializer::new().with_context(ctx);
908        assert!(serializer.context().has_prefixes());
909    }
910
911    #[test]
912    fn test_serialize_primitives() {
913        let serializer = ToonSerializer::new();
914
915        let value = json!({
916            "name": "Alice",
917            "age": 30,
918            "active": true,
919            "score": null
920        });
921
922        let toon = serializer.serialize(&value).unwrap();
923        assert!(toon.contains("name: Alice"));
924        assert!(toon.contains("age: 30"));
925        assert!(toon.contains("active: true"));
926        assert!(toon.contains("score: null"));
927    }
928
929    #[test]
930    fn test_serialize_primitive_array() {
931        let serializer = ToonSerializer::new();
932
933        let value = json!({
934            "tags": ["rust", "wasm", "python"]
935        });
936
937        let toon = serializer.serialize(&value).unwrap();
938        assert!(toon.contains("tags[3]:"));
939        assert!(toon.contains("rust"));
940    }
941
942    #[test]
943    fn test_serialize_tabular_array() {
944        let serializer = ToonSerializer::new();
945
946        let value = json!({
947            "people": [
948                {"name": "Alice", "age": 30},
949                {"name": "Bob", "age": 25}
950            ]
951        });
952
953        let toon = serializer.serialize(&value).unwrap();
954        assert!(toon.contains("people[2]{"));
955        assert!(toon.contains("Alice"));
956        assert!(toon.contains("Bob"));
957    }
958
959    #[test]
960    fn test_serialize_empty_array() {
961        let serializer = ToonSerializer::new();
962
963        let value = json!({
964            "items": []
965        });
966
967        let toon = serializer.serialize(&value).unwrap();
968        assert!(toon.contains("items[0]:"));
969    }
970
971    #[test]
972    fn test_serialize_nested_object() {
973        let serializer = ToonSerializer::new();
974
975        let value = json!({
976            "person": {
977                "name": "Alice",
978                "address": {
979                    "city": "Seattle"
980                }
981            }
982        });
983
984        let toon = serializer.serialize(&value).unwrap();
985        assert!(toon.contains("person:"));
986        assert!(toon.contains("address:"));
987        assert!(toon.contains("city: Seattle"));
988    }
989
990    #[test]
991    fn test_quote_if_needed() {
992        let serializer = ToonSerializer::new();
993
994        assert_eq!(serializer.quote_if_needed("hello"), "hello");
995        assert_eq!(
996            serializer.quote_if_needed("hello, world"),
997            "\"hello, world\""
998        );
999        assert_eq!(serializer.quote_if_needed("key: value"), "\"key: value\"");
1000        assert_eq!(serializer.quote_if_needed("a|b"), "\"a|b\"");
1001        assert_eq!(serializer.quote_if_needed(""), "\"\"");
1002        assert_eq!(serializer.quote_if_needed(" leading"), "\" leading\"");
1003        assert_eq!(serializer.quote_if_needed("trailing "), "\"trailing \"");
1004    }
1005
1006    #[test]
1007    fn test_serialize_with_context_compaction() {
1008        let mut ctx = JsonLdContext::new();
1009        ctx.add_prefix("foaf", "http://xmlns.com/foaf/0.1/");
1010
1011        let serializer = ToonSerializer::new().with_context(ctx);
1012
1013        let value = json!({
1014            "http://xmlns.com/foaf/0.1/name": "Alice"
1015        });
1016
1017        let toon = serializer.serialize(&value).unwrap();
1018        assert!(toon.contains("foaf:name"));
1019    }
1020
1021    #[test]
1022    fn test_serialize_value_node_with_language() {
1023        let serializer = ToonSerializer::new();
1024
1025        let value = json!({
1026            "title": {
1027                "@value": "Bonjour",
1028                "@language": "fr"
1029            }
1030        });
1031
1032        let toon = serializer.serialize(&value).unwrap();
1033        // Value nodes now use standard TOON object syntax
1034        assert!(toon.contains("@value"));
1035        assert!(toon.contains("Bonjour"));
1036        assert!(toon.contains("@language"));
1037        assert!(toon.contains("fr"));
1038    }
1039
1040    #[test]
1041    fn test_serialize_value_node_with_type() {
1042        let mut ctx = JsonLdContext::new();
1043        ctx.add_prefix("xsd", "http://www.w3.org/2001/XMLSchema#");
1044
1045        let serializer = ToonSerializer::new().with_context(ctx);
1046
1047        let value = json!({
1048            "date": {
1049                "@value": "2024-01-15",
1050                "@type": "http://www.w3.org/2001/XMLSchema#date"
1051            }
1052        });
1053
1054        let toon = serializer.serialize(&value).unwrap();
1055        // Value nodes now use standard TOON object syntax
1056        assert!(toon.contains("@value"));
1057        assert!(toon.contains("2024-01-15"));
1058        assert!(toon.contains("@type"));
1059        assert!(toon.contains("xsd:date"));
1060    }
1061
1062    #[test]
1063    fn test_serialize_context() {
1064        let serializer = ToonSerializer::new();
1065
1066        let value = json!({
1067            "@context": {
1068                "foaf": "http://xmlns.com/foaf/0.1/",
1069                "schema": "http://schema.org/"
1070            },
1071            "name": "Test"
1072        });
1073
1074        let toon = serializer.serialize(&value).unwrap();
1075        assert!(toon.contains("@context:"));
1076        assert!(toon.contains("foaf:"));
1077        assert!(toon.contains("schema:"));
1078    }
1079
1080    #[test]
1081    fn test_serialize_graph() {
1082        let serializer = ToonSerializer::new();
1083
1084        let value = json!({
1085            "@graph": [
1086                {"@id": "ex:1", "name": "Alice"},
1087                {"@id": "ex:2", "name": "Bob"}
1088            ]
1089        });
1090
1091        let toon = serializer.serialize(&value).unwrap();
1092        assert!(toon.contains("@graph[2]"));
1093    }
1094
1095    #[test]
1096    fn test_serialize_json_string() {
1097        let serializer = ToonSerializer::new();
1098
1099        let toon = serializer
1100            .serialize_json(r#"{"name": "Alice", "age": 30}"#)
1101            .unwrap();
1102        assert!(toon.contains("name: Alice"));
1103        assert!(toon.contains("age: 30"));
1104    }
1105
1106    #[test]
1107    fn test_tabular_array_union_of_keys() {
1108        // Disable partitioning to test union schema explicitly
1109        let serializer = ToonSerializer::new().with_shape_partitioning(false);
1110
1111        let value = json!({
1112            "items": [
1113                {"a": 1, "b": 2},
1114                {"a": 3, "c": 4}
1115            ]
1116        });
1117
1118        let toon = serializer.serialize(&value).unwrap();
1119        // Should have union of keys
1120        assert!(toon.contains("items[2]{a,b,c}:"));
1121        // Missing fields should be null
1122        assert!(toon.contains("1, 2, null"));
1123        assert!(toon.contains("3, null, 4"));
1124    }
1125
1126    #[test]
1127    fn test_shape_partitioning_disabled() {
1128        // Test with partitioning disabled - should use union schema
1129        let serializer = ToonSerializer::new().with_shape_partitioning(false);
1130
1131        let value = json!({
1132            "items": [
1133                {"a": 1, "b": 2},
1134                {"a": 3, "c": 4},
1135                {"x": 5, "y": 6}
1136            ]
1137        });
1138
1139        let toon = serializer.serialize(&value).unwrap();
1140        // Should use union schema even with high sparsity
1141        assert!(toon.contains("items[3]{a,b,c,x,y}:"));
1142    }
1143
1144    #[test]
1145    fn test_shape_partitioning_low_sparsity() {
1146        // Test with low sparsity - should NOT partition
1147        let serializer = ToonSerializer::new();
1148
1149        let value = json!({
1150            "items": [
1151                {"a": 1, "b": 2},
1152                {"a": 3, "b": 4},
1153                {"a": 5, "b": 6}
1154            ]
1155        });
1156
1157        let toon = serializer.serialize(&value).unwrap();
1158        // Low sparsity - should use single table
1159        assert!(toon.contains("items[3]{a,b}:"));
1160        assert!(!toon.contains("items[1]")); // No partitions
1161    }
1162
1163    #[test]
1164    fn test_shape_partitioning_high_sparsity() {
1165        // Test with high sparsity - SHOULD partition
1166        let serializer = ToonSerializer::new();
1167
1168        let value = json!({
1169            "people": [
1170                {"@id": "ex:1", "name": "Alice", "age": 30, "email": "alice@example.com"},
1171                {"@id": "ex:2", "name": "Bob", "phone": "+1234567890", "address": "123 Main St"},
1172                {"@id": "ex:3", "name": "Carol", "company": "ACME", "role": "Engineer", "salary": 100000}
1173            ]
1174        });
1175
1176        let toon = serializer.serialize(&value).unwrap();
1177
1178        // High sparsity - should partition into separate blocks
1179        // Each entity has completely different fields, so they should be separate
1180        assert!(
1181            toon.contains("people[1]"),
1182            "Should have partitioned blocks with [1]"
1183        );
1184
1185        // Count how many people[] blocks we have
1186        let people_blocks = toon.matches("people[").count();
1187        assert_eq!(
1188            people_blocks, 3,
1189            "Should have 3 separate blocks (one per entity) due to completely different shapes"
1190        );
1191
1192        // Should NOT have the union of all keys in one header
1193        assert!(
1194            !toon.contains("people[3]"),
1195            "Should not have a single block with all 3 entities"
1196        );
1197    }
1198
1199    #[test]
1200    fn test_shape_partitioning_heterogeneous_graph() {
1201        let serializer = ToonSerializer::new();
1202
1203        let value = json!({
1204            "@graph": [
1205                {"@id": "ex:person1", "@type": "Person", "name": "Alice", "age": 30, "email": "alice@example.com"},
1206                {"@id": "ex:person2", "@type": "Person", "name": "Bob", "age": 25, "email": "bob@example.com"},
1207                {"@id": "ex:org1", "@type": "Organization", "name": "ACME", "industry": "Tech", "founded": 2000, "employees": 500, "revenue": 10000000},
1208                {"@id": "ex:org2", "@type": "Organization", "name": "XYZ", "industry": "Finance", "founded": 1995, "employees": 300, "revenue": 5000000}
1209            ]
1210        });
1211
1212        let toon = serializer.serialize(&value).unwrap();
1213        // Should partition by shape
1214        assert!(toon.contains("@graph[2]"));
1215        // Should have separate blocks for Person and Organization
1216        let graph_count = toon.matches("@graph[").count();
1217        assert_eq!(graph_count, 2, "Should have 2 @graph blocks");
1218    }
1219
1220    #[test]
1221    fn test_calculate_sparsity() {
1222        let serializer = ToonSerializer::new();
1223
1224        // Test high sparsity
1225        let high_sparse = vec![json!({"a": 1}), json!({"b": 2}), json!({"c": 3})];
1226        let fields = vec!["a".to_string(), "b".to_string(), "c".to_string()];
1227        let sparsity = serializer.calculate_sparsity(&high_sparse, &fields);
1228        assert!(sparsity > 0.6, "Should have high sparsity (~66%)");
1229
1230        // Test low sparsity
1231        let low_sparse = vec![json!({"a": 1, "b": 2}), json!({"a": 3, "b": 4})];
1232        let fields = vec!["a".to_string(), "b".to_string()];
1233        let sparsity = serializer.calculate_sparsity(&low_sparse, &fields);
1234        assert_eq!(sparsity, 0.0, "Should have zero sparsity");
1235    }
1236
1237    #[test]
1238    fn test_entity_signature() {
1239        let serializer = ToonSerializer::new();
1240
1241        let obj1 =
1242            serde_json::from_str::<Map<String, Value>>(r#"{"name": "Alice", "age": 30}"#).unwrap();
1243        let obj2 =
1244            serde_json::from_str::<Map<String, Value>>(r#"{"age": 30, "name": "Bob"}"#).unwrap();
1245        let obj3 = serde_json::from_str::<Map<String, Value>>(
1246            r#"{"name": "Carol", "email": "c@example.com"}"#,
1247        )
1248        .unwrap();
1249
1250        let sig1 = serializer.entity_signature(&obj1);
1251        let sig2 = serializer.entity_signature(&obj2);
1252        let sig3 = serializer.entity_signature(&obj3);
1253
1254        // Same keys should produce same signature (order independent)
1255        assert_eq!(sig1, sig2);
1256        assert_eq!(sig1, "age|name");
1257
1258        // Different keys should produce different signature
1259        assert_ne!(sig1, sig3);
1260        assert_eq!(sig3, "email|name");
1261    }
1262
1263    #[test]
1264    fn test_partition_by_shape() {
1265        let serializer = ToonSerializer::new();
1266
1267        let arr = vec![
1268            json!({"a": 1, "b": 2}),
1269            json!({"a": 3, "b": 4}),
1270            json!({"x": 5, "y": 6}),
1271            json!({"x": 7, "y": 8}),
1272            json!({"x": 9, "y": 10}),
1273        ];
1274
1275        let partitions = serializer.partition_by_shape(&arr);
1276
1277        // Should have 2 partitions
1278        assert_eq!(partitions.len(), 2);
1279
1280        // Largest partition should come first (3 entities with x,y)
1281        assert_eq!(partitions[0].2.len(), 3);
1282        assert_eq!(partitions[1].2.len(), 2);
1283    }
1284
1285    #[test]
1286    fn test_shape_partitioning_roundtrip() {
1287        use crate::ToonParser;
1288
1289        let serializer = ToonSerializer::new();
1290        let parser = ToonParser::new();
1291
1292        let original = json!({
1293            "@graph": [
1294                {"@id": "ex:1", "@type": "Person", "name": "Alice", "age": 30},
1295                {"@id": "ex:2", "@type": "Person", "name": "Bob", "age": 25},
1296                {"@id": "ex:3", "@type": "Org", "name": "ACME", "industry": "Tech"}
1297            ]
1298        });
1299
1300        // Serialize with partitioning
1301        let toon = serializer.serialize(&original).unwrap();
1302
1303        // Parse back
1304        let parsed = parser.parse(&toon).unwrap();
1305
1306        // Should have @graph array with all 3 entities
1307        let graph = parsed.get("@graph").expect("Should have @graph");
1308        assert!(graph.is_array());
1309        let graph_arr = graph.as_array().unwrap();
1310        assert_eq!(
1311            graph_arr.len(),
1312            3,
1313            "Should have all 3 entities after parsing"
1314        );
1315
1316        // Verify all entities are present
1317        assert!(graph_arr
1318            .iter()
1319            .any(|v| v.get("@id").and_then(|id| id.as_str()) == Some("ex:1")));
1320        assert!(graph_arr
1321            .iter()
1322            .any(|v| v.get("@id").and_then(|id| id.as_str()) == Some("ex:2")));
1323        assert!(graph_arr
1324            .iter()
1325            .any(|v| v.get("@id").and_then(|id| id.as_str()) == Some("ex:3")));
1326    }
1327}