base_d/encoders/algorithms/schema/
mod.rs

1pub mod binary_packer;
2pub mod binary_unpacker;
3pub mod compression;
4pub mod display96;
5pub mod frame;
6pub mod parsers;
7pub mod serializers;
8pub mod stele;
9pub mod stele_analyzer;
10pub mod types;
11
12#[cfg(test)]
13mod edge_cases;
14
15// Re-export key types for convenience
16pub use binary_packer::pack;
17pub use binary_unpacker::unpack;
18pub use compression::SchemaCompressionAlgo;
19pub use frame::{decode_framed, encode_framed};
20pub use parsers::{InputParser, JsonParser};
21// MarkdownDocParser used internally by encode_markdown_stele_* functions
22pub use serializers::{JsonSerializer, OutputSerializer};
23pub use types::{
24    FieldDef, FieldType, IntermediateRepresentation, SchemaError, SchemaHeader, SchemaValue,
25};
26
27// Re-export stele functions for library users
28#[allow(unused_imports)]
29pub use stele::{parse as parse_stele, serialize as serialize_stele};
30
31/// Encode JSON to schema format: JSON → IR → binary → \[compress\] → display96 → framed
32///
33/// Transforms JSON into a compact, display-safe wire format suitable for LLM-to-LLM communication.
34/// The output is wrapped in Egyptian hieroglyph delimiters (`𓍹...𓍺`) and uses a 96-character
35/// alphabet of box-drawing and geometric shapes.
36///
37/// # Arguments
38///
39/// * `json` - JSON string to encode (must be object or array of objects)
40/// * `compress` - Optional compression algorithm (brotli, lz4, or zstd)
41///
42/// # Returns
43///
44/// Returns a framed, display-safe string like `𓍹{encoded_payload}𓍺`
45///
46/// # Errors
47///
48/// * `SchemaError::InvalidInput` - Invalid JSON or unsupported structure (e.g., root primitives)
49/// * `SchemaError::Compression` - Compression failure
50///
51/// # Example
52///
53/// ```ignore
54/// use base_d::{encode_schema, SchemaCompressionAlgo};
55///
56/// let json = r#"{"users":[{"id":1,"name":"alice"}]}"#;
57///
58/// // Without compression
59/// let encoded = encode_schema(json, None)?;
60/// println!("{}", encoded); // 𓍹╣◟╥◕◝▰◣◥▟╺▖◘▰◝▤◀╧𓍺
61///
62/// // With brotli compression
63/// let compressed = encode_schema(json, Some(SchemaCompressionAlgo::Brotli))?;
64/// ```
65///
66/// # See Also
67///
68/// * [`decode_schema`] - Decode schema format back to JSON
69/// * [SCHEMA.md](../../../SCHEMA.md) - Full format specification
70pub fn encode_schema(
71    json: &str,
72    compress: Option<SchemaCompressionAlgo>,
73) -> Result<String, SchemaError> {
74    use parsers::{InputParser, JsonParser};
75
76    let ir = JsonParser::parse(json)?;
77    let binary = pack(&ir);
78    let compressed = compression::compress_with_prefix(&binary, compress)?;
79    Ok(frame::encode_framed(&compressed))
80}
81
82/// Decode schema format to JSON: framed → display96 → \[decompress\] → binary → IR → JSON
83///
84/// Reverses the schema encoding pipeline to reconstruct the original JSON from the framed,
85/// display-safe wire format. Automatically detects and handles compression.
86///
87/// # Arguments
88///
89/// * `encoded` - Schema-encoded string with delimiters (`𓍹...𓍺`)
90/// * `pretty` - Pretty-print JSON output with indentation
91///
92/// # Returns
93///
94/// Returns the decoded JSON string (minified or pretty-printed)
95///
96/// # Errors
97///
98/// * `SchemaError::InvalidFrame` - Missing or invalid frame delimiters
99/// * `SchemaError::InvalidCharacter` - Invalid character in display96 payload
100/// * `SchemaError::Decompression` - Decompression failure
101/// * `SchemaError::UnexpectedEndOfData` - Truncated or corrupted binary data
102/// * `SchemaError::InvalidTypeTag` - Invalid type tag in header
103///
104/// # Example
105///
106/// ```ignore
107/// use base_d::decode_schema;
108///
109/// let encoded = "𓍹╣◟╥◕◝▰◣◥▟╺▖◘▰◝▤◀╧𓍺";
110///
111/// // Minified output
112/// let json = decode_schema(encoded, false)?;
113/// println!("{}", json); // {"users":[{"id":1,"name":"alice"}]}
114///
115/// // Pretty-printed output
116/// let pretty = decode_schema(encoded, true)?;
117/// println!("{}", pretty);
118/// // {
119/// //   "users": [
120/// //     {"id": 1, "name": "alice"}
121/// //   ]
122/// // }
123/// ```
124///
125/// # See Also
126///
127/// * [`encode_schema`] - Encode JSON to schema format
128/// * [SCHEMA.md](../../../SCHEMA.md) - Full format specification
129pub fn decode_schema(encoded: &str, pretty: bool) -> Result<String, SchemaError> {
130    use serializers::{JsonSerializer, OutputSerializer};
131
132    let compressed = frame::decode_framed(encoded)?;
133    let binary = compression::decompress_with_prefix(&compressed)?;
134    let ir = unpack(&binary)?;
135    JsonSerializer::serialize(&ir, pretty)
136}
137
138/// Encode JSON to stele format: JSON → IR → stele
139///
140/// Transforms JSON into a model-readable structured format using Unicode delimiters.
141/// Unlike carrier98 (opaque binary), stele is designed for models to parse directly.
142///
143/// # Format
144///
145/// ```text
146/// @{root}┃{field}:{type}┃{field}:{type}...
147/// ◉{value}┃{value}┃{value}...
148/// ```
149///
150/// # Example
151///
152/// ```ignore
153/// use base_d::encode_stele;
154///
155/// let json = r#"{"users":[{"id":1,"name":"alice"}]}"#;
156/// let stele = encode_stele(json)?;
157/// // @users┃id:int┃name:str
158/// // ◉1┃alice
159/// ```
160pub fn encode_stele(json: &str, minify: bool) -> Result<String, SchemaError> {
161    encode_stele_with_options(json, minify, true, true)
162}
163
164pub fn encode_stele_minified(json: &str) -> Result<String, SchemaError> {
165    encode_stele_with_options(json, true, true, true)
166}
167
168/// Encode JSON to stele without tokenization (human-readable field names)
169pub fn encode_stele_readable(json: &str, minify: bool) -> Result<String, SchemaError> {
170    encode_stele_with_options(json, minify, false, false)
171}
172
173/// Encode JSON to stele with field tokenization only (no value dictionary)
174pub fn encode_stele_light(json: &str, minify: bool) -> Result<String, SchemaError> {
175    encode_stele_with_options(json, minify, true, false)
176}
177
178/// Encode JSON to stele path mode (one line per leaf value)
179pub fn encode_stele_path(json: &str) -> Result<String, SchemaError> {
180    stele::serialize_path_mode(json)
181}
182
183/// Decode stele path mode to JSON
184pub fn decode_stele_path(path_input: &str) -> Result<String, SchemaError> {
185    stele::parse_path_mode(path_input)
186}
187
188/// Encode JSON to ASCII inline stele format
189pub fn encode_stele_ascii(json: &str) -> Result<String, SchemaError> {
190    use parsers::{InputParser, JsonParser};
191    let ir = JsonParser::parse(json)?;
192    stele::serialize_ascii(&ir)
193}
194
195/// Encode markdown document to ASCII inline stele format
196pub fn encode_markdown_stele_ascii(markdown: &str) -> Result<String, SchemaError> {
197    use parsers::{InputParser, MarkdownDocParser};
198    let ir = MarkdownDocParser::parse(markdown)?;
199    stele::serialize_ascii(&ir)
200}
201
202/// Encode markdown document to markdown-like inline stele format
203/// Uses #1-#6 for headers, -1/-2 for lists, preserves markdown syntax patterns
204pub fn encode_markdown_stele_markdown(markdown: &str) -> Result<String, SchemaError> {
205    use parsers::{InputParser, MarkdownDocParser};
206    let ir = MarkdownDocParser::parse(markdown)?;
207    stele::serialize_markdown(&ir)
208}
209
210/// Encode markdown document to stele format: markdown → IR → stele
211///
212/// Parses a full markdown document into a simplified block-based representation,
213/// then encodes to stele format for model-readable output.
214pub fn encode_markdown_stele(markdown: &str, minify: bool) -> Result<String, SchemaError> {
215    encode_markdown_stele_with_options(markdown, minify, true, true)
216}
217
218/// Encode markdown to stele without tokenization (human-readable)
219pub fn encode_markdown_stele_readable(markdown: &str, minify: bool) -> Result<String, SchemaError> {
220    encode_markdown_stele_with_options(markdown, minify, false, false)
221}
222
223/// Encode markdown to stele with field tokenization only (no value dictionary)
224pub fn encode_markdown_stele_light(markdown: &str, minify: bool) -> Result<String, SchemaError> {
225    encode_markdown_stele_with_options(markdown, minify, true, false)
226}
227
228fn encode_markdown_stele_with_options(
229    markdown: &str,
230    minify: bool,
231    tokenize_fields: bool,
232    tokenize_values: bool,
233) -> Result<String, SchemaError> {
234    use parsers::{InputParser, MarkdownDocParser};
235
236    let ir = MarkdownDocParser::parse(markdown)?;
237    match (tokenize_fields, tokenize_values) {
238        (true, true) => stele::serialize(&ir, minify),
239        (true, false) => stele::serialize_light(&ir, minify),
240        (false, false) => stele::serialize_readable(&ir, minify),
241        (false, true) => {
242            // Invalid: can't tokenize values without tokenizing fields
243            stele::serialize_readable(&ir, minify)
244        }
245    }
246}
247
248fn encode_stele_with_options(
249    json: &str,
250    minify: bool,
251    tokenize_fields: bool,
252    tokenize_values: bool,
253) -> Result<String, SchemaError> {
254    use parsers::{InputParser, JsonParser};
255
256    let ir = JsonParser::parse(json)?;
257    match (tokenize_fields, tokenize_values) {
258        (true, true) => stele::serialize(&ir, minify),
259        (true, false) => stele::serialize_light(&ir, minify),
260        (false, false) => stele::serialize_readable(&ir, minify),
261        (false, true) => {
262            // Invalid: can't tokenize values without tokenizing fields
263            stele::serialize_readable(&ir, minify)
264        }
265    }
266}
267
268/// Decode stele format to JSON: stele → IR → JSON
269///
270/// Reverses the stele encoding to reconstruct JSON from the model-readable format.
271///
272/// # Example
273///
274/// ```ignore
275/// use base_d::decode_stele;
276///
277/// let stele = "@users┃id:int┃name:str\n◉1┃alice";
278/// let json = decode_stele(stele, false)?;
279/// // {"users":[{"id":1,"name":"alice"}]}
280/// ```
281pub fn decode_stele(stele_input: &str, pretty: bool) -> Result<String, SchemaError> {
282    use serializers::{JsonSerializer, OutputSerializer};
283
284    let ir = stele::parse(stele_input)?;
285    JsonSerializer::serialize(&ir, pretty)
286}
287
288#[cfg(test)]
289mod integration_tests {
290    use super::*;
291    use crate::encoders::algorithms::schema::types::{
292        FLAG_HAS_NULLS, FLAG_HAS_ROOT_KEY, FieldDef, FieldType, IntermediateRepresentation,
293        SchemaHeader, SchemaValue,
294    };
295    use parsers::{InputParser, JsonParser};
296    use serializers::{JsonSerializer, OutputSerializer};
297
298    #[test]
299    fn test_round_trip_simple() {
300        let fields = vec![
301            FieldDef::new("id", FieldType::U64),
302            FieldDef::new("name", FieldType::String),
303        ];
304        let header = SchemaHeader::new(2, fields);
305
306        let values = vec![
307            SchemaValue::U64(1),
308            SchemaValue::String("Alice".to_string()),
309            SchemaValue::U64(2),
310            SchemaValue::String("Bob".to_string()),
311        ];
312
313        let original = IntermediateRepresentation::new(header, values).unwrap();
314
315        // Pack and unpack
316        let packed = pack(&original);
317        let unpacked = unpack(&packed).unwrap();
318
319        assert_eq!(original, unpacked);
320    }
321
322    #[test]
323    fn test_round_trip_all_types() {
324        let fields = vec![
325            FieldDef::new("u64_field", FieldType::U64),
326            FieldDef::new("i64_field", FieldType::I64),
327            FieldDef::new("f64_field", FieldType::F64),
328            FieldDef::new("string_field", FieldType::String),
329            FieldDef::new("bool_field", FieldType::Bool),
330        ];
331        let header = SchemaHeader::new(1, fields);
332
333        let values = vec![
334            SchemaValue::U64(42),
335            SchemaValue::I64(-42),
336            SchemaValue::F64(std::f64::consts::PI),
337            SchemaValue::String("test".to_string()),
338            SchemaValue::Bool(true),
339        ];
340
341        let original = IntermediateRepresentation::new(header, values).unwrap();
342
343        let packed = pack(&original);
344        let unpacked = unpack(&packed).unwrap();
345
346        assert_eq!(original, unpacked);
347    }
348
349    #[test]
350    fn test_round_trip_with_root_key() {
351        let mut header = SchemaHeader::new(1, vec![FieldDef::new("id", FieldType::U64)]);
352        header.root_key = Some("users".to_string());
353        header.set_flag(FLAG_HAS_ROOT_KEY);
354
355        let values = vec![SchemaValue::U64(42)];
356        let original = IntermediateRepresentation::new(header, values).unwrap();
357
358        let packed = pack(&original);
359        let unpacked = unpack(&packed).unwrap();
360
361        assert_eq!(original, unpacked);
362    }
363
364    #[test]
365    fn test_round_trip_with_nulls() {
366        let mut header = SchemaHeader::new(
367            2,
368            vec![
369                FieldDef::new("id", FieldType::U64),
370                FieldDef::new("name", FieldType::String),
371            ],
372        );
373
374        // Mark second value as null (row 0, field 1)
375        let total_values: usize = 2 * 2; // 2 rows * 2 fields = 4 values
376        let bitmap_bytes = total_values.div_ceil(8); // 1 byte
377        let mut null_bitmap = vec![0u8; bitmap_bytes];
378        null_bitmap[0] |= 1 << 1; // Set bit 1 (second value)
379
380        header.null_bitmap = Some(null_bitmap);
381        header.set_flag(FLAG_HAS_NULLS);
382
383        let values = vec![
384            SchemaValue::U64(1),
385            SchemaValue::Null, // This is marked as null in bitmap
386            SchemaValue::U64(2),
387            SchemaValue::String("Bob".to_string()),
388        ];
389
390        let original = IntermediateRepresentation::new(header, values).unwrap();
391
392        let packed = pack(&original);
393        let unpacked = unpack(&packed).unwrap();
394
395        assert_eq!(original, unpacked);
396    }
397
398    #[test]
399    fn test_round_trip_array() {
400        let fields = vec![FieldDef::new(
401            "tags",
402            FieldType::Array(Box::new(FieldType::U64)),
403        )];
404        let header = SchemaHeader::new(1, fields);
405
406        let values = vec![SchemaValue::Array(vec![
407            SchemaValue::U64(1),
408            SchemaValue::U64(2),
409            SchemaValue::U64(3),
410        ])];
411
412        let original = IntermediateRepresentation::new(header, values).unwrap();
413
414        let packed = pack(&original);
415        let unpacked = unpack(&packed).unwrap();
416
417        assert_eq!(original, unpacked);
418    }
419
420    #[test]
421    fn test_round_trip_large_values() {
422        let fields = vec![
423            FieldDef::new("large_u64", FieldType::U64),
424            FieldDef::new("large_i64", FieldType::I64),
425        ];
426        let header = SchemaHeader::new(1, fields);
427
428        let values = vec![SchemaValue::U64(u64::MAX), SchemaValue::I64(i64::MIN)];
429
430        let original = IntermediateRepresentation::new(header, values).unwrap();
431
432        let packed = pack(&original);
433        let unpacked = unpack(&packed).unwrap();
434
435        assert_eq!(original, unpacked);
436    }
437
438    #[test]
439    fn test_round_trip_empty_string() {
440        let fields = vec![FieldDef::new("name", FieldType::String)];
441        let header = SchemaHeader::new(1, fields);
442
443        let values = vec![SchemaValue::String("".to_string())];
444
445        let original = IntermediateRepresentation::new(header, values).unwrap();
446
447        let packed = pack(&original);
448        let unpacked = unpack(&packed).unwrap();
449
450        assert_eq!(original, unpacked);
451    }
452
453    #[test]
454    fn test_round_trip_multiple_rows() {
455        let fields = vec![
456            FieldDef::new("id", FieldType::U64),
457            FieldDef::new("score", FieldType::F64),
458            FieldDef::new("active", FieldType::Bool),
459        ];
460        let header = SchemaHeader::new(3, fields);
461
462        let values = vec![
463            SchemaValue::U64(1),
464            SchemaValue::F64(95.5),
465            SchemaValue::Bool(true),
466            SchemaValue::U64(2),
467            SchemaValue::F64(87.3),
468            SchemaValue::Bool(false),
469            SchemaValue::U64(3),
470            SchemaValue::F64(92.1),
471            SchemaValue::Bool(true),
472        ];
473
474        let original = IntermediateRepresentation::new(header, values).unwrap();
475
476        let packed = pack(&original);
477        let unpacked = unpack(&packed).unwrap();
478
479        assert_eq!(original, unpacked);
480    }
481
482    #[test]
483    fn test_invalid_data() {
484        // Empty data
485        let result = unpack(&[]);
486        assert!(matches!(
487            result,
488            Err(SchemaError::UnexpectedEndOfData { .. })
489        ));
490
491        // Truncated data
492        let result = unpack(&[0, 1, 2]);
493        assert!(result.is_err());
494    }
495
496    #[test]
497    fn test_json_full_roundtrip() {
498        let input = r#"{"users":[{"id":1,"name":"alice"},{"id":2,"name":"bob"}]}"#;
499        let ir = JsonParser::parse(input).unwrap();
500        let binary = pack(&ir);
501        let compressed = compression::compress_with_prefix(&binary, None).unwrap();
502        let decompressed = compression::decompress_with_prefix(&compressed).unwrap();
503        let ir2 = unpack(&decompressed).unwrap();
504        let output = JsonSerializer::serialize(&ir2, false).unwrap();
505
506        // Parse both as serde_json::Value and compare (order-independent)
507        let input_value: serde_json::Value = serde_json::from_str(input).unwrap();
508        let output_value: serde_json::Value = serde_json::from_str(&output).unwrap();
509        assert_eq!(input_value, output_value);
510    }
511
512    #[test]
513    fn test_json_simple_object() {
514        let input = r#"{"id":1,"name":"alice","score":95.5}"#;
515        let ir = JsonParser::parse(input).unwrap();
516        let binary = pack(&ir);
517        let ir2 = unpack(&binary).unwrap();
518        let output = JsonSerializer::serialize(&ir2, false).unwrap();
519
520        let input_value: serde_json::Value = serde_json::from_str(input).unwrap();
521        let output_value: serde_json::Value = serde_json::from_str(&output).unwrap();
522        assert_eq!(input_value, output_value);
523    }
524
525    #[test]
526    fn test_json_swapi_nested_arrays() {
527        // SWAPI-like data with nested arrays of primitives
528        // Primitive arrays now stored inline
529        let input = r#"{"people":[{"name":"Luke","height":"172","films":["film/1","film/2"],"vehicles":[]},{"name":"C-3PO","height":"167","films":["film/1","film/2","film/3"],"vehicles":[]}]}"#;
530        let ir = JsonParser::parse(input).unwrap();
531
532        // Verify stele representation (readable mode for string matching)
533        let stele_output = stele::serialize_readable(&ir, false).unwrap();
534
535        // Should have @people root key
536        assert!(stele_output.starts_with("@people"));
537        // Primitive arrays now inline with superscript + ⟦⟧ syntax
538        assert!(stele_output.contains("filmsˢ⟦⟧"));
539        assert!(stele_output.contains("vehiclesˢ⟦⟧"));
540
541        // Verify round trip - arrays become indexed objects
542        let binary = pack(&ir);
543        let ir2 = unpack(&binary).unwrap();
544        let output = JsonSerializer::serialize(&ir2, false).unwrap();
545
546        // Parse output and verify structure
547        let output_value: serde_json::Value = serde_json::from_str(&output).unwrap();
548        let people = output_value
549            .as_object()
550            .unwrap()
551            .get("people")
552            .unwrap()
553            .as_array()
554            .unwrap();
555
556        // First person has films as properly reconstructed array
557        let luke = &people[0];
558        assert_eq!(luke["name"], "Luke");
559        assert_eq!(luke["height"], "172");
560        let luke_films = luke["films"].as_array().unwrap();
561        assert_eq!(luke_films[0], "film/1");
562        assert_eq!(luke_films[1], "film/2");
563    }
564
565    #[test]
566    fn test_json_wrapper_keys() {
567        // Test common pagination wrapper keys get unwrapped
568        let test_cases = vec![
569            r#"{"results":[{"id":1,"name":"a"},{"id":2,"name":"b"}]}"#,
570            r#"{"data":[{"id":1,"name":"a"},{"id":2,"name":"b"}]}"#,
571            r#"{"items":[{"id":1,"name":"a"},{"id":2,"name":"b"}]}"#,
572            r#"{"records":[{"id":1,"name":"a"},{"id":2,"name":"b"}]}"#,
573        ];
574
575        for input in test_cases {
576            let ir = JsonParser::parse(input).unwrap();
577
578            // Should have root key from wrapper
579            assert!(ir.header.root_key.is_some());
580            let root = ir.header.root_key.as_ref().unwrap();
581            assert!(root == "results" || root == "data" || root == "items" || root == "records");
582
583            // Should have 2 rows (unwrapped the array)
584            assert_eq!(ir.header.row_count, 2);
585
586            // Round trip should preserve data
587            let binary = pack(&ir);
588            let ir2 = unpack(&binary).unwrap();
589            let output = JsonSerializer::serialize(&ir2, false).unwrap();
590
591            let input_value: serde_json::Value = serde_json::from_str(input).unwrap();
592            let output_value: serde_json::Value = serde_json::from_str(&output).unwrap();
593            assert_eq!(input_value, output_value);
594        }
595    }
596
597    #[test]
598    fn test_json_nested_objects() {
599        let input = r#"{"user":{"profile":{"name":"alice","age":30}}}"#;
600        let ir = JsonParser::parse(input).unwrap();
601        let binary = pack(&ir);
602        let ir2 = unpack(&binary).unwrap();
603        let output = JsonSerializer::serialize(&ir2, false).unwrap();
604
605        let input_value: serde_json::Value = serde_json::from_str(input).unwrap();
606        let output_value: serde_json::Value = serde_json::from_str(&output).unwrap();
607        assert_eq!(input_value, output_value);
608    }
609
610    #[test]
611    fn test_json_with_nulls() {
612        let input = r#"{"name":"alice","age":null,"active":true}"#;
613        let ir = JsonParser::parse(input).unwrap();
614        assert!(ir.header.has_flag(FLAG_HAS_NULLS));
615
616        let binary = pack(&ir);
617        let ir2 = unpack(&binary).unwrap();
618        let output = JsonSerializer::serialize(&ir2, false).unwrap();
619
620        let input_value: serde_json::Value = serde_json::from_str(input).unwrap();
621        let output_value: serde_json::Value = serde_json::from_str(&output).unwrap();
622        assert_eq!(input_value, output_value);
623    }
624
625    #[test]
626    fn test_json_with_arrays() {
627        // Arrays now flatten to indexed objects
628        let input = r#"{"scores":[95,87,92],"tags":["rust","json"]}"#;
629        let ir = JsonParser::parse(input).unwrap();
630        let binary = pack(&ir);
631        let ir2 = unpack(&binary).unwrap();
632        let output = JsonSerializer::serialize(&ir2, false).unwrap();
633
634        // Expected: arrays are properly reconstructed as arrays
635        let expected = r#"{"scores":[95,87,92],"tags":["rust","json"]}"#;
636        let expected_value: serde_json::Value = serde_json::from_str(expected).unwrap();
637        let output_value: serde_json::Value = serde_json::from_str(&output).unwrap();
638        assert_eq!(expected_value, output_value);
639    }
640
641    #[test]
642    fn test_encode_schema_roundtrip() {
643        let input = r#"{"users":[{"id":1,"name":"alice"},{"id":2,"name":"bob"}]}"#;
644        let encoded = encode_schema(input, None).unwrap();
645
646        // Validate frame delimiters
647        assert!(encoded.starts_with(frame::FRAME_START));
648        assert!(encoded.ends_with(frame::FRAME_END));
649
650        // Decode back to JSON
651        let decoded = decode_schema(&encoded, false).unwrap();
652
653        // Compare as JSON values (order-independent)
654        let input_value: serde_json::Value = serde_json::from_str(input).unwrap();
655        let output_value: serde_json::Value = serde_json::from_str(&decoded).unwrap();
656        assert_eq!(input_value, output_value);
657    }
658
659    #[test]
660    fn test_encode_schema_simple() {
661        let input = r#"{"id":1,"name":"alice","score":95.5}"#;
662        let encoded = encode_schema(input, None).unwrap();
663        let decoded = decode_schema(&encoded, false).unwrap();
664
665        let input_value: serde_json::Value = serde_json::from_str(input).unwrap();
666        let output_value: serde_json::Value = serde_json::from_str(&decoded).unwrap();
667        assert_eq!(input_value, output_value);
668    }
669
670    #[test]
671    fn test_encode_schema_with_nulls() {
672        let input = r#"{"name":"alice","age":null,"active":true}"#;
673        let encoded = encode_schema(input, None).unwrap();
674        let decoded = decode_schema(&encoded, false).unwrap();
675
676        let input_value: serde_json::Value = serde_json::from_str(input).unwrap();
677        let output_value: serde_json::Value = serde_json::from_str(&decoded).unwrap();
678        assert_eq!(input_value, output_value);
679    }
680
681    #[test]
682    fn test_encode_schema_empty_object() {
683        let input = r#"{}"#;
684        let result = encode_schema(input, None);
685        // Empty objects should fail or handle gracefully
686        // This depends on JsonParser behavior
687        println!("Empty object result: {:?}", result);
688    }
689
690    #[test]
691    fn test_decode_schema_invalid_frame() {
692        let invalid = "not_framed_data";
693        let result = decode_schema(invalid, false);
694        assert!(matches!(result, Err(SchemaError::InvalidFrame(_))));
695    }
696
697    #[test]
698    fn test_decode_schema_invalid_chars() {
699        let invalid = format!("{}ABC{}", frame::FRAME_START, frame::FRAME_END);
700        let result = decode_schema(&invalid, false);
701        assert!(matches!(result, Err(SchemaError::InvalidCharacter(_))));
702    }
703
704    #[test]
705    fn test_visual_wire_format() {
706        let input = r#"{"users":[{"id":1,"name":"alice"},{"id":2,"name":"bob"}]}"#;
707        let encoded = encode_schema(input, None).unwrap();
708
709        println!("\n=== Visual Wire Format ===");
710        println!("Input JSON: {}", input);
711        println!("Input length: {} bytes", input.len());
712        println!("\nEncoded output: {}", encoded);
713        println!(
714            "Encoded length: {} chars ({} bytes UTF-8)",
715            encoded.chars().count(),
716            encoded.len()
717        );
718
719        // Calculate compression ratio
720        let compression_ratio = input.len() as f64 / encoded.len() as f64;
721        println!("Compression ratio: {:.2}x", compression_ratio);
722
723        // Decode and verify
724        let decoded = decode_schema(&encoded, false).unwrap();
725        let input_value: serde_json::Value = serde_json::from_str(input).unwrap();
726        let output_value: serde_json::Value = serde_json::from_str(&decoded).unwrap();
727        assert_eq!(input_value, output_value);
728        println!("Roundtrip verified ✓\n");
729    }
730
731    #[test]
732    fn test_compression_comparison() {
733        let test_cases = [
734            r#"{"id":1}"#,
735            r#"{"id":1,"name":"alice"}"#,
736            r#"{"users":[{"id":1,"name":"alice"},{"id":2,"name":"bob"}]}"#,
737            r#"{"data":[1,2,3,4,5,6,7,8,9,10]}"#,
738        ];
739
740        println!("\n=== Compression Comparison ===");
741        for (i, input) in test_cases.iter().enumerate() {
742            let encoded = encode_schema(input, None).unwrap();
743            let ratio = input.len() as f64 / encoded.len() as f64;
744
745            println!(
746                "Test case {}: {} bytes → {} bytes ({:.2}x)",
747                i + 1,
748                input.len(),
749                encoded.len(),
750                ratio
751            );
752        }
753        println!();
754    }
755
756    #[test]
757    fn test_encode_schema_with_compression() {
758        use super::SchemaCompressionAlgo;
759
760        let input = r#"{"users":[{"id":1,"name":"alice"},{"id":2,"name":"bob"},{"id":3,"name":"charlie"}]}"#;
761
762        // Test each compression algorithm
763        for algo in [
764            SchemaCompressionAlgo::Brotli,
765            SchemaCompressionAlgo::Lz4,
766            SchemaCompressionAlgo::Zstd,
767        ] {
768            let encoded = encode_schema(input, Some(algo)).unwrap();
769            let decoded = decode_schema(&encoded, false).unwrap();
770
771            let input_value: serde_json::Value = serde_json::from_str(input).unwrap();
772            let output_value: serde_json::Value = serde_json::from_str(&decoded).unwrap();
773            assert_eq!(
774                input_value, output_value,
775                "Failed for compression algorithm: {:?}",
776                algo
777            );
778        }
779    }
780
781    #[test]
782    fn test_compression_size_comparison() {
783        use super::SchemaCompressionAlgo;
784
785        let input = r#"{"users":[{"id":1,"name":"alice","active":true,"score":95.5},{"id":2,"name":"bob","active":false,"score":87.3},{"id":3,"name":"charlie","active":true,"score":92.1}]}"#;
786
787        println!("\n=== Compression Size Comparison ===");
788        println!("Input JSON: {} bytes", input.len());
789
790        let no_compress = encode_schema(input, None).unwrap();
791        println!("No compression: {} bytes", no_compress.len());
792
793        for algo in [
794            SchemaCompressionAlgo::Brotli,
795            SchemaCompressionAlgo::Lz4,
796            SchemaCompressionAlgo::Zstd,
797        ] {
798            let compressed = encode_schema(input, Some(algo)).unwrap();
799            let ratio = no_compress.len() as f64 / compressed.len() as f64;
800            println!(
801                "{:?}: {} bytes ({:.2}x vs uncompressed)",
802                algo,
803                compressed.len(),
804                ratio
805            );
806        }
807        println!();
808    }
809
810    #[test]
811    fn test_nested_object_roundtrip_single_level() {
812        let input = r#"{"id":"A1","name":"Jim","grade":{"math":60,"physics":66,"chemistry":61}}"#;
813
814        // JSON → IR → stele (readable for string matching)
815        let ir = JsonParser::parse(input).unwrap();
816        let stele = stele::serialize_readable(&ir, false).unwrap();
817
818        // Verify flattened field names with ჻ and superscript types
819        assert!(stele.contains("grade჻mathⁱ"));
820        assert!(stele.contains("grade჻physicsⁱ"));
821        assert!(stele.contains("grade჻chemistryⁱ"));
822
823        // stele → IR → JSON (using tokenized format for roundtrip)
824        let tokenized = stele::serialize(&ir, false).unwrap();
825        let ir2 = stele::parse(&tokenized).unwrap();
826        let output = JsonSerializer::serialize(&ir2, false).unwrap();
827
828        // Compare JSON
829        let input_value: serde_json::Value = serde_json::from_str(input).unwrap();
830        let output_value: serde_json::Value = serde_json::from_str(&output).unwrap();
831        assert_eq!(input_value, output_value);
832    }
833
834    #[test]
835    fn test_nested_object_roundtrip_deep() {
836        let input = r#"{"a":{"b":{"c":{"d":42}}}}"#;
837
838        let ir = JsonParser::parse(input).unwrap();
839        let stele = stele::serialize_readable(&ir, false).unwrap();
840
841        // Verify deep nesting with ჻ and superscript type
842        assert!(stele.contains("a჻b჻c჻dⁱ"));
843
844        // Roundtrip with tokenized format
845        let tokenized = stele::serialize(&ir, false).unwrap();
846        let ir2 = stele::parse(&tokenized).unwrap();
847        let output = JsonSerializer::serialize(&ir2, false).unwrap();
848
849        let input_value: serde_json::Value = serde_json::from_str(input).unwrap();
850        let output_value: serde_json::Value = serde_json::from_str(&output).unwrap();
851        assert_eq!(input_value, output_value);
852    }
853
854    #[test]
855    fn test_nested_object_roundtrip_array_of_objects() {
856        let input = r#"{"students":[{"id":"A1","name":"Jim","grade":{"math":60,"physics":66}},{"id":"B2","name":"Sara","grade":{"math":85,"physics":90}}]}"#;
857
858        let ir = JsonParser::parse(input).unwrap();
859        let stele = stele::serialize_readable(&ir, false).unwrap();
860
861        // Verify root key and flattened nested fields with superscript types
862        assert!(stele.starts_with("@students"));
863        assert!(stele.contains("grade჻mathⁱ"));
864        assert!(stele.contains("grade჻physicsⁱ"));
865
866        // Roundtrip with tokenized format
867        let tokenized = stele::serialize(&ir, false).unwrap();
868        let ir2 = stele::parse(&tokenized).unwrap();
869        let output = JsonSerializer::serialize(&ir2, false).unwrap();
870
871        let input_value: serde_json::Value = serde_json::from_str(input).unwrap();
872        let output_value: serde_json::Value = serde_json::from_str(&output).unwrap();
873        assert_eq!(input_value, output_value);
874    }
875
876    #[test]
877    fn test_nested_object_roundtrip_mixed_with_arrays() {
878        // Primitive arrays now stored inline
879        let input = r#"{"person":{"name":"Alice","tags":["admin","user"],"address":{"city":"Boston","zip":"02101"}}}"#;
880
881        let ir = JsonParser::parse(input).unwrap();
882        let stele = stele::serialize_readable(&ir, false).unwrap();
883
884        // Verify both object nesting and inline primitive arrays with superscript types
885        assert!(stele.contains("person჻nameˢ"));
886        // Primitive arrays now inline with superscript + ⟦⟧ syntax
887        assert!(stele.contains("person჻tagsˢ⟦⟧"));
888        assert!(stele.contains("person჻address჻cityˢ"));
889        assert!(stele.contains("person჻address჻zipˢ"));
890
891        // Roundtrip with tokenized format
892        let tokenized = stele::serialize(&ir, false).unwrap();
893        let ir2 = stele::parse(&tokenized).unwrap();
894        let output = JsonSerializer::serialize(&ir2, false).unwrap();
895
896        // Arrays are properly reconstructed
897        let expected = r#"{"person":{"address":{"city":"Boston","zip":"02101"},"name":"Alice","tags":["admin","user"]}}"#;
898        let expected_value: serde_json::Value = serde_json::from_str(expected).unwrap();
899        let output_value: serde_json::Value = serde_json::from_str(&output).unwrap();
900        assert_eq!(expected_value, output_value);
901    }
902
903    #[test]
904    fn test_nested_object_roundtrip_schema_encode() {
905        let input = r#"{"data":{"user":{"profile":{"name":"alice","age":30}}}}"#;
906
907        // Full schema pipeline: JSON → IR → binary → display96 → framed
908        let encoded = encode_schema(input, None).unwrap();
909        let decoded = decode_schema(&encoded, false).unwrap();
910
911        let input_value: serde_json::Value = serde_json::from_str(input).unwrap();
912        let output_value: serde_json::Value = serde_json::from_str(&decoded).unwrap();
913        assert_eq!(input_value, output_value);
914    }
915}