base_d/encoders/algorithms/schema/
mod.rs

1pub mod binary_packer;
2pub mod binary_unpacker;
3pub mod compression;
4pub mod display96;
5pub mod fiche;
6pub mod frame;
7pub mod parsers;
8pub mod serializers;
9pub mod types;
10
11#[cfg(test)]
12mod edge_cases;
13
14// Re-export key types for convenience
15pub use binary_packer::pack;
16pub use binary_unpacker::unpack;
17pub use compression::SchemaCompressionAlgo;
18pub use frame::{decode_framed, encode_framed};
19pub use parsers::{InputParser, JsonParser};
20pub use serializers::{JsonSerializer, OutputSerializer};
21pub use types::{
22    FieldDef, FieldType, IntermediateRepresentation, SchemaError, SchemaHeader, SchemaValue,
23};
24
25// Re-export fiche functions for library users
26#[allow(unused_imports)]
27pub use fiche::{parse as parse_fiche, serialize as serialize_fiche};
28
29/// Encode JSON to schema format: JSON → IR → binary → \[compress\] → display96 → framed
30///
31/// Transforms JSON into a compact, display-safe wire format suitable for LLM-to-LLM communication.
32/// The output is wrapped in Egyptian hieroglyph delimiters (`𓍹...𓍺`) and uses a 96-character
33/// alphabet of box-drawing and geometric shapes.
34///
35/// # Arguments
36///
37/// * `json` - JSON string to encode (must be object or array of objects)
38/// * `compress` - Optional compression algorithm (brotli, lz4, or zstd)
39///
40/// # Returns
41///
42/// Returns a framed, display-safe string like `𓍹{encoded_payload}𓍺`
43///
44/// # Errors
45///
46/// * `SchemaError::InvalidInput` - Invalid JSON or unsupported structure (e.g., root primitives)
47/// * `SchemaError::Compression` - Compression failure
48///
49/// # Example
50///
51/// ```ignore
52/// use base_d::{encode_schema, SchemaCompressionAlgo};
53///
54/// let json = r#"{"users":[{"id":1,"name":"alice"}]}"#;
55///
56/// // Without compression
57/// let encoded = encode_schema(json, None)?;
58/// println!("{}", encoded); // 𓍹╣◟╥◕◝▰◣◥▟╺▖◘▰◝▤◀╧𓍺
59///
60/// // With brotli compression
61/// let compressed = encode_schema(json, Some(SchemaCompressionAlgo::Brotli))?;
62/// ```
63///
64/// # See Also
65///
66/// * [`decode_schema`] - Decode schema format back to JSON
67/// * [SCHEMA.md](../../../SCHEMA.md) - Full format specification
68pub fn encode_schema(
69    json: &str,
70    compress: Option<SchemaCompressionAlgo>,
71) -> Result<String, SchemaError> {
72    use parsers::{InputParser, JsonParser};
73
74    let ir = JsonParser::parse(json)?;
75    let binary = pack(&ir);
76    let compressed = compression::compress_with_prefix(&binary, compress)?;
77    Ok(frame::encode_framed(&compressed))
78}
79
80/// Decode schema format to JSON: framed → display96 → \[decompress\] → binary → IR → JSON
81///
82/// Reverses the schema encoding pipeline to reconstruct the original JSON from the framed,
83/// display-safe wire format. Automatically detects and handles compression.
84///
85/// # Arguments
86///
87/// * `encoded` - Schema-encoded string with delimiters (`𓍹...𓍺`)
88/// * `pretty` - Pretty-print JSON output with indentation
89///
90/// # Returns
91///
92/// Returns the decoded JSON string (minified or pretty-printed)
93///
94/// # Errors
95///
96/// * `SchemaError::InvalidFrame` - Missing or invalid frame delimiters
97/// * `SchemaError::InvalidCharacter` - Invalid character in display96 payload
98/// * `SchemaError::Decompression` - Decompression failure
99/// * `SchemaError::UnexpectedEndOfData` - Truncated or corrupted binary data
100/// * `SchemaError::InvalidTypeTag` - Invalid type tag in header
101///
102/// # Example
103///
104/// ```ignore
105/// use base_d::decode_schema;
106///
107/// let encoded = "𓍹╣◟╥◕◝▰◣◥▟╺▖◘▰◝▤◀╧𓍺";
108///
109/// // Minified output
110/// let json = decode_schema(encoded, false)?;
111/// println!("{}", json); // {"users":[{"id":1,"name":"alice"}]}
112///
113/// // Pretty-printed output
114/// let pretty = decode_schema(encoded, true)?;
115/// println!("{}", pretty);
116/// // {
117/// //   "users": [
118/// //     {"id": 1, "name": "alice"}
119/// //   ]
120/// // }
121/// ```
122///
123/// # See Also
124///
125/// * [`encode_schema`] - Encode JSON to schema format
126/// * [SCHEMA.md](../../../SCHEMA.md) - Full format specification
127pub fn decode_schema(encoded: &str, pretty: bool) -> Result<String, SchemaError> {
128    use serializers::{JsonSerializer, OutputSerializer};
129
130    let compressed = frame::decode_framed(encoded)?;
131    let binary = compression::decompress_with_prefix(&compressed)?;
132    let ir = unpack(&binary)?;
133    JsonSerializer::serialize(&ir, pretty)
134}
135
136/// Encode JSON to fiche format: JSON → IR → fiche
137///
138/// Transforms JSON into a model-readable structured format using Unicode delimiters.
139/// Unlike carrier98 (opaque binary), fiche is designed for models to parse directly.
140///
141/// # Format
142///
143/// ```text
144/// @{root}┃{field}:{type}┃{field}:{type}...
145/// ◉{value}┃{value}┃{value}...
146/// ```
147///
148/// # Example
149///
150/// ```ignore
151/// use base_d::encode_fiche;
152///
153/// let json = r#"{"users":[{"id":1,"name":"alice"}]}"#;
154/// let fiche = encode_fiche(json)?;
155/// // @users┃id:int┃name:str
156/// // ◉1┃alice
157/// ```
158pub fn encode_fiche(json: &str, minify: bool) -> Result<String, SchemaError> {
159    encode_fiche_with_options(json, minify, true, true)
160}
161
162pub fn encode_fiche_minified(json: &str) -> Result<String, SchemaError> {
163    encode_fiche_with_options(json, true, true, true)
164}
165
166/// Encode JSON to fiche without tokenization (human-readable field names)
167pub fn encode_fiche_readable(json: &str, minify: bool) -> Result<String, SchemaError> {
168    encode_fiche_with_options(json, minify, false, false)
169}
170
171/// Encode JSON to fiche with field tokenization only (no value dictionary)
172pub fn encode_fiche_light(json: &str, minify: bool) -> Result<String, SchemaError> {
173    encode_fiche_with_options(json, minify, true, false)
174}
175
176fn encode_fiche_with_options(
177    json: &str,
178    minify: bool,
179    tokenize_fields: bool,
180    tokenize_values: bool,
181) -> Result<String, SchemaError> {
182    use parsers::{InputParser, JsonParser};
183
184    let ir = JsonParser::parse(json)?;
185    match (tokenize_fields, tokenize_values) {
186        (true, true) => fiche::serialize(&ir, minify),
187        (true, false) => fiche::serialize_light(&ir, minify),
188        (false, false) => fiche::serialize_readable(&ir, minify),
189        (false, true) => {
190            // Invalid: can't tokenize values without tokenizing fields
191            fiche::serialize_readable(&ir, minify)
192        }
193    }
194}
195
196/// Decode fiche format to JSON: fiche → IR → JSON
197///
198/// Reverses the fiche encoding to reconstruct JSON from the model-readable format.
199///
200/// # Example
201///
202/// ```ignore
203/// use base_d::decode_fiche;
204///
205/// let fiche = "@users┃id:int┃name:str\n◉1┃alice";
206/// let json = decode_fiche(fiche, false)?;
207/// // {"users":[{"id":1,"name":"alice"}]}
208/// ```
209pub fn decode_fiche(fiche_input: &str, pretty: bool) -> Result<String, SchemaError> {
210    use serializers::{JsonSerializer, OutputSerializer};
211
212    let ir = fiche::parse(fiche_input)?;
213    JsonSerializer::serialize(&ir, pretty)
214}
215
216#[cfg(test)]
217mod integration_tests {
218    use super::*;
219    use crate::encoders::algorithms::schema::types::{
220        FLAG_HAS_NULLS, FLAG_HAS_ROOT_KEY, FieldDef, FieldType, IntermediateRepresentation,
221        SchemaHeader, SchemaValue,
222    };
223    use parsers::{InputParser, JsonParser};
224    use serializers::{JsonSerializer, OutputSerializer};
225
226    #[test]
227    fn test_round_trip_simple() {
228        let fields = vec![
229            FieldDef::new("id", FieldType::U64),
230            FieldDef::new("name", FieldType::String),
231        ];
232        let header = SchemaHeader::new(2, fields);
233
234        let values = vec![
235            SchemaValue::U64(1),
236            SchemaValue::String("Alice".to_string()),
237            SchemaValue::U64(2),
238            SchemaValue::String("Bob".to_string()),
239        ];
240
241        let original = IntermediateRepresentation::new(header, values).unwrap();
242
243        // Pack and unpack
244        let packed = pack(&original);
245        let unpacked = unpack(&packed).unwrap();
246
247        assert_eq!(original, unpacked);
248    }
249
250    #[test]
251    fn test_round_trip_all_types() {
252        let fields = vec![
253            FieldDef::new("u64_field", FieldType::U64),
254            FieldDef::new("i64_field", FieldType::I64),
255            FieldDef::new("f64_field", FieldType::F64),
256            FieldDef::new("string_field", FieldType::String),
257            FieldDef::new("bool_field", FieldType::Bool),
258        ];
259        let header = SchemaHeader::new(1, fields);
260
261        let values = vec![
262            SchemaValue::U64(42),
263            SchemaValue::I64(-42),
264            SchemaValue::F64(std::f64::consts::PI),
265            SchemaValue::String("test".to_string()),
266            SchemaValue::Bool(true),
267        ];
268
269        let original = IntermediateRepresentation::new(header, values).unwrap();
270
271        let packed = pack(&original);
272        let unpacked = unpack(&packed).unwrap();
273
274        assert_eq!(original, unpacked);
275    }
276
277    #[test]
278    fn test_round_trip_with_root_key() {
279        let mut header = SchemaHeader::new(1, vec![FieldDef::new("id", FieldType::U64)]);
280        header.root_key = Some("users".to_string());
281        header.set_flag(FLAG_HAS_ROOT_KEY);
282
283        let values = vec![SchemaValue::U64(42)];
284        let original = IntermediateRepresentation::new(header, values).unwrap();
285
286        let packed = pack(&original);
287        let unpacked = unpack(&packed).unwrap();
288
289        assert_eq!(original, unpacked);
290    }
291
292    #[test]
293    fn test_round_trip_with_nulls() {
294        let mut header = SchemaHeader::new(
295            2,
296            vec![
297                FieldDef::new("id", FieldType::U64),
298                FieldDef::new("name", FieldType::String),
299            ],
300        );
301
302        // Mark second value as null (row 0, field 1)
303        let total_values: usize = 2 * 2; // 2 rows * 2 fields = 4 values
304        let bitmap_bytes = total_values.div_ceil(8); // 1 byte
305        let mut null_bitmap = vec![0u8; bitmap_bytes];
306        null_bitmap[0] |= 1 << 1; // Set bit 1 (second value)
307
308        header.null_bitmap = Some(null_bitmap);
309        header.set_flag(FLAG_HAS_NULLS);
310
311        let values = vec![
312            SchemaValue::U64(1),
313            SchemaValue::Null, // This is marked as null in bitmap
314            SchemaValue::U64(2),
315            SchemaValue::String("Bob".to_string()),
316        ];
317
318        let original = IntermediateRepresentation::new(header, values).unwrap();
319
320        let packed = pack(&original);
321        let unpacked = unpack(&packed).unwrap();
322
323        assert_eq!(original, unpacked);
324    }
325
326    #[test]
327    fn test_round_trip_array() {
328        let fields = vec![FieldDef::new(
329            "tags",
330            FieldType::Array(Box::new(FieldType::U64)),
331        )];
332        let header = SchemaHeader::new(1, fields);
333
334        let values = vec![SchemaValue::Array(vec![
335            SchemaValue::U64(1),
336            SchemaValue::U64(2),
337            SchemaValue::U64(3),
338        ])];
339
340        let original = IntermediateRepresentation::new(header, values).unwrap();
341
342        let packed = pack(&original);
343        let unpacked = unpack(&packed).unwrap();
344
345        assert_eq!(original, unpacked);
346    }
347
348    #[test]
349    fn test_round_trip_large_values() {
350        let fields = vec![
351            FieldDef::new("large_u64", FieldType::U64),
352            FieldDef::new("large_i64", FieldType::I64),
353        ];
354        let header = SchemaHeader::new(1, fields);
355
356        let values = vec![SchemaValue::U64(u64::MAX), SchemaValue::I64(i64::MIN)];
357
358        let original = IntermediateRepresentation::new(header, values).unwrap();
359
360        let packed = pack(&original);
361        let unpacked = unpack(&packed).unwrap();
362
363        assert_eq!(original, unpacked);
364    }
365
366    #[test]
367    fn test_round_trip_empty_string() {
368        let fields = vec![FieldDef::new("name", FieldType::String)];
369        let header = SchemaHeader::new(1, fields);
370
371        let values = vec![SchemaValue::String("".to_string())];
372
373        let original = IntermediateRepresentation::new(header, values).unwrap();
374
375        let packed = pack(&original);
376        let unpacked = unpack(&packed).unwrap();
377
378        assert_eq!(original, unpacked);
379    }
380
381    #[test]
382    fn test_round_trip_multiple_rows() {
383        let fields = vec![
384            FieldDef::new("id", FieldType::U64),
385            FieldDef::new("score", FieldType::F64),
386            FieldDef::new("active", FieldType::Bool),
387        ];
388        let header = SchemaHeader::new(3, fields);
389
390        let values = vec![
391            SchemaValue::U64(1),
392            SchemaValue::F64(95.5),
393            SchemaValue::Bool(true),
394            SchemaValue::U64(2),
395            SchemaValue::F64(87.3),
396            SchemaValue::Bool(false),
397            SchemaValue::U64(3),
398            SchemaValue::F64(92.1),
399            SchemaValue::Bool(true),
400        ];
401
402        let original = IntermediateRepresentation::new(header, values).unwrap();
403
404        let packed = pack(&original);
405        let unpacked = unpack(&packed).unwrap();
406
407        assert_eq!(original, unpacked);
408    }
409
410    #[test]
411    fn test_invalid_data() {
412        // Empty data
413        let result = unpack(&[]);
414        assert!(matches!(
415            result,
416            Err(SchemaError::UnexpectedEndOfData { .. })
417        ));
418
419        // Truncated data
420        let result = unpack(&[0, 1, 2]);
421        assert!(result.is_err());
422    }
423
424    #[test]
425    fn test_json_full_roundtrip() {
426        let input = r#"{"users":[{"id":1,"name":"alice"},{"id":2,"name":"bob"}]}"#;
427        let ir = JsonParser::parse(input).unwrap();
428        let binary = pack(&ir);
429        let compressed = compression::compress_with_prefix(&binary, None).unwrap();
430        let decompressed = compression::decompress_with_prefix(&compressed).unwrap();
431        let ir2 = unpack(&decompressed).unwrap();
432        let output = JsonSerializer::serialize(&ir2, false).unwrap();
433
434        // Parse both as serde_json::Value and compare (order-independent)
435        let input_value: serde_json::Value = serde_json::from_str(input).unwrap();
436        let output_value: serde_json::Value = serde_json::from_str(&output).unwrap();
437        assert_eq!(input_value, output_value);
438    }
439
440    #[test]
441    fn test_json_simple_object() {
442        let input = r#"{"id":1,"name":"alice","score":95.5}"#;
443        let ir = JsonParser::parse(input).unwrap();
444        let binary = pack(&ir);
445        let ir2 = unpack(&binary).unwrap();
446        let output = JsonSerializer::serialize(&ir2, false).unwrap();
447
448        let input_value: serde_json::Value = serde_json::from_str(input).unwrap();
449        let output_value: serde_json::Value = serde_json::from_str(&output).unwrap();
450        assert_eq!(input_value, output_value);
451    }
452
453    #[test]
454    fn test_json_swapi_nested_arrays() {
455        // SWAPI-like data with nested arrays of primitives
456        // Primitive arrays now stored inline
457        let input = r#"{"people":[{"name":"Luke","height":"172","films":["film/1","film/2"],"vehicles":[]},{"name":"C-3PO","height":"167","films":["film/1","film/2","film/3"],"vehicles":[]}]}"#;
458        let ir = JsonParser::parse(input).unwrap();
459
460        // Verify fiche representation (readable mode for string matching)
461        let fiche_output = fiche::serialize_readable(&ir, false).unwrap();
462
463        // Should have @people root key
464        assert!(fiche_output.starts_with("@people"));
465        // Primitive arrays now inline with superscript + ⟦⟧ syntax
466        assert!(fiche_output.contains("filmsˢ⟦⟧"));
467        assert!(fiche_output.contains("vehiclesˢ⟦⟧"));
468
469        // Verify round trip - arrays become indexed objects
470        let binary = pack(&ir);
471        let ir2 = unpack(&binary).unwrap();
472        let output = JsonSerializer::serialize(&ir2, false).unwrap();
473
474        // Parse output and verify structure
475        let output_value: serde_json::Value = serde_json::from_str(&output).unwrap();
476        let people = output_value
477            .as_object()
478            .unwrap()
479            .get("people")
480            .unwrap()
481            .as_array()
482            .unwrap();
483
484        // First person has films as properly reconstructed array
485        let luke = &people[0];
486        assert_eq!(luke["name"], "Luke");
487        assert_eq!(luke["height"], "172");
488        let luke_films = luke["films"].as_array().unwrap();
489        assert_eq!(luke_films[0], "film/1");
490        assert_eq!(luke_films[1], "film/2");
491    }
492
493    #[test]
494    fn test_json_wrapper_keys() {
495        // Test common pagination wrapper keys get unwrapped
496        let test_cases = vec![
497            r#"{"results":[{"id":1,"name":"a"},{"id":2,"name":"b"}]}"#,
498            r#"{"data":[{"id":1,"name":"a"},{"id":2,"name":"b"}]}"#,
499            r#"{"items":[{"id":1,"name":"a"},{"id":2,"name":"b"}]}"#,
500            r#"{"records":[{"id":1,"name":"a"},{"id":2,"name":"b"}]}"#,
501        ];
502
503        for input in test_cases {
504            let ir = JsonParser::parse(input).unwrap();
505
506            // Should have root key from wrapper
507            assert!(ir.header.root_key.is_some());
508            let root = ir.header.root_key.as_ref().unwrap();
509            assert!(root == "results" || root == "data" || root == "items" || root == "records");
510
511            // Should have 2 rows (unwrapped the array)
512            assert_eq!(ir.header.row_count, 2);
513
514            // Round trip should preserve data
515            let binary = pack(&ir);
516            let ir2 = unpack(&binary).unwrap();
517            let output = JsonSerializer::serialize(&ir2, false).unwrap();
518
519            let input_value: serde_json::Value = serde_json::from_str(input).unwrap();
520            let output_value: serde_json::Value = serde_json::from_str(&output).unwrap();
521            assert_eq!(input_value, output_value);
522        }
523    }
524
525    #[test]
526    fn test_json_nested_objects() {
527        let input = r#"{"user":{"profile":{"name":"alice","age":30}}}"#;
528        let ir = JsonParser::parse(input).unwrap();
529        let binary = pack(&ir);
530        let ir2 = unpack(&binary).unwrap();
531        let output = JsonSerializer::serialize(&ir2, false).unwrap();
532
533        let input_value: serde_json::Value = serde_json::from_str(input).unwrap();
534        let output_value: serde_json::Value = serde_json::from_str(&output).unwrap();
535        assert_eq!(input_value, output_value);
536    }
537
538    #[test]
539    fn test_json_with_nulls() {
540        let input = r#"{"name":"alice","age":null,"active":true}"#;
541        let ir = JsonParser::parse(input).unwrap();
542        assert!(ir.header.has_flag(FLAG_HAS_NULLS));
543
544        let binary = pack(&ir);
545        let ir2 = unpack(&binary).unwrap();
546        let output = JsonSerializer::serialize(&ir2, false).unwrap();
547
548        let input_value: serde_json::Value = serde_json::from_str(input).unwrap();
549        let output_value: serde_json::Value = serde_json::from_str(&output).unwrap();
550        assert_eq!(input_value, output_value);
551    }
552
553    #[test]
554    fn test_json_with_arrays() {
555        // Arrays now flatten to indexed objects
556        let input = r#"{"scores":[95,87,92],"tags":["rust","json"]}"#;
557        let ir = JsonParser::parse(input).unwrap();
558        let binary = pack(&ir);
559        let ir2 = unpack(&binary).unwrap();
560        let output = JsonSerializer::serialize(&ir2, false).unwrap();
561
562        // Expected: arrays are properly reconstructed as arrays
563        let expected = r#"{"scores":[95,87,92],"tags":["rust","json"]}"#;
564        let expected_value: serde_json::Value = serde_json::from_str(expected).unwrap();
565        let output_value: serde_json::Value = serde_json::from_str(&output).unwrap();
566        assert_eq!(expected_value, output_value);
567    }
568
569    #[test]
570    fn test_encode_schema_roundtrip() {
571        let input = r#"{"users":[{"id":1,"name":"alice"},{"id":2,"name":"bob"}]}"#;
572        let encoded = encode_schema(input, None).unwrap();
573
574        // Validate frame delimiters
575        assert!(encoded.starts_with(frame::FRAME_START));
576        assert!(encoded.ends_with(frame::FRAME_END));
577
578        // Decode back to JSON
579        let decoded = decode_schema(&encoded, false).unwrap();
580
581        // Compare as JSON values (order-independent)
582        let input_value: serde_json::Value = serde_json::from_str(input).unwrap();
583        let output_value: serde_json::Value = serde_json::from_str(&decoded).unwrap();
584        assert_eq!(input_value, output_value);
585    }
586
587    #[test]
588    fn test_encode_schema_simple() {
589        let input = r#"{"id":1,"name":"alice","score":95.5}"#;
590        let encoded = encode_schema(input, None).unwrap();
591        let decoded = decode_schema(&encoded, false).unwrap();
592
593        let input_value: serde_json::Value = serde_json::from_str(input).unwrap();
594        let output_value: serde_json::Value = serde_json::from_str(&decoded).unwrap();
595        assert_eq!(input_value, output_value);
596    }
597
598    #[test]
599    fn test_encode_schema_with_nulls() {
600        let input = r#"{"name":"alice","age":null,"active":true}"#;
601        let encoded = encode_schema(input, None).unwrap();
602        let decoded = decode_schema(&encoded, false).unwrap();
603
604        let input_value: serde_json::Value = serde_json::from_str(input).unwrap();
605        let output_value: serde_json::Value = serde_json::from_str(&decoded).unwrap();
606        assert_eq!(input_value, output_value);
607    }
608
609    #[test]
610    fn test_encode_schema_empty_object() {
611        let input = r#"{}"#;
612        let result = encode_schema(input, None);
613        // Empty objects should fail or handle gracefully
614        // This depends on JsonParser behavior
615        println!("Empty object result: {:?}", result);
616    }
617
618    #[test]
619    fn test_decode_schema_invalid_frame() {
620        let invalid = "not_framed_data";
621        let result = decode_schema(invalid, false);
622        assert!(matches!(result, Err(SchemaError::InvalidFrame(_))));
623    }
624
625    #[test]
626    fn test_decode_schema_invalid_chars() {
627        let invalid = format!("{}ABC{}", frame::FRAME_START, frame::FRAME_END);
628        let result = decode_schema(&invalid, false);
629        assert!(matches!(result, Err(SchemaError::InvalidCharacter(_))));
630    }
631
632    #[test]
633    fn test_visual_wire_format() {
634        let input = r#"{"users":[{"id":1,"name":"alice"},{"id":2,"name":"bob"}]}"#;
635        let encoded = encode_schema(input, None).unwrap();
636
637        println!("\n=== Visual Wire Format ===");
638        println!("Input JSON: {}", input);
639        println!("Input length: {} bytes", input.len());
640        println!("\nEncoded output: {}", encoded);
641        println!(
642            "Encoded length: {} chars ({} bytes UTF-8)",
643            encoded.chars().count(),
644            encoded.len()
645        );
646
647        // Calculate compression ratio
648        let compression_ratio = input.len() as f64 / encoded.len() as f64;
649        println!("Compression ratio: {:.2}x", compression_ratio);
650
651        // Decode and verify
652        let decoded = decode_schema(&encoded, false).unwrap();
653        let input_value: serde_json::Value = serde_json::from_str(input).unwrap();
654        let output_value: serde_json::Value = serde_json::from_str(&decoded).unwrap();
655        assert_eq!(input_value, output_value);
656        println!("Roundtrip verified ✓\n");
657    }
658
659    #[test]
660    fn test_compression_comparison() {
661        let test_cases = [
662            r#"{"id":1}"#,
663            r#"{"id":1,"name":"alice"}"#,
664            r#"{"users":[{"id":1,"name":"alice"},{"id":2,"name":"bob"}]}"#,
665            r#"{"data":[1,2,3,4,5,6,7,8,9,10]}"#,
666        ];
667
668        println!("\n=== Compression Comparison ===");
669        for (i, input) in test_cases.iter().enumerate() {
670            let encoded = encode_schema(input, None).unwrap();
671            let ratio = input.len() as f64 / encoded.len() as f64;
672
673            println!(
674                "Test case {}: {} bytes → {} bytes ({:.2}x)",
675                i + 1,
676                input.len(),
677                encoded.len(),
678                ratio
679            );
680        }
681        println!();
682    }
683
684    #[test]
685    fn test_encode_schema_with_compression() {
686        use super::SchemaCompressionAlgo;
687
688        let input = r#"{"users":[{"id":1,"name":"alice"},{"id":2,"name":"bob"},{"id":3,"name":"charlie"}]}"#;
689
690        // Test each compression algorithm
691        for algo in [
692            SchemaCompressionAlgo::Brotli,
693            SchemaCompressionAlgo::Lz4,
694            SchemaCompressionAlgo::Zstd,
695        ] {
696            let encoded = encode_schema(input, Some(algo)).unwrap();
697            let decoded = decode_schema(&encoded, false).unwrap();
698
699            let input_value: serde_json::Value = serde_json::from_str(input).unwrap();
700            let output_value: serde_json::Value = serde_json::from_str(&decoded).unwrap();
701            assert_eq!(
702                input_value, output_value,
703                "Failed for compression algorithm: {:?}",
704                algo
705            );
706        }
707    }
708
709    #[test]
710    fn test_compression_size_comparison() {
711        use super::SchemaCompressionAlgo;
712
713        let input = r#"{"users":[{"id":1,"name":"alice","active":true,"score":95.5},{"id":2,"name":"bob","active":false,"score":87.3},{"id":3,"name":"charlie","active":true,"score":92.1}]}"#;
714
715        println!("\n=== Compression Size Comparison ===");
716        println!("Input JSON: {} bytes", input.len());
717
718        let no_compress = encode_schema(input, None).unwrap();
719        println!("No compression: {} bytes", no_compress.len());
720
721        for algo in [
722            SchemaCompressionAlgo::Brotli,
723            SchemaCompressionAlgo::Lz4,
724            SchemaCompressionAlgo::Zstd,
725        ] {
726            let compressed = encode_schema(input, Some(algo)).unwrap();
727            let ratio = no_compress.len() as f64 / compressed.len() as f64;
728            println!(
729                "{:?}: {} bytes ({:.2}x vs uncompressed)",
730                algo,
731                compressed.len(),
732                ratio
733            );
734        }
735        println!();
736    }
737
738    #[test]
739    fn test_nested_object_roundtrip_single_level() {
740        let input = r#"{"id":"A1","name":"Jim","grade":{"math":60,"physics":66,"chemistry":61}}"#;
741
742        // JSON → IR → fiche (readable for string matching)
743        let ir = JsonParser::parse(input).unwrap();
744        let fiche = fiche::serialize_readable(&ir, false).unwrap();
745
746        // Verify flattened field names with ჻ and superscript types
747        assert!(fiche.contains("grade჻mathⁱ"));
748        assert!(fiche.contains("grade჻physicsⁱ"));
749        assert!(fiche.contains("grade჻chemistryⁱ"));
750
751        // fiche → IR → JSON (using tokenized format for roundtrip)
752        let tokenized = fiche::serialize(&ir, false).unwrap();
753        let ir2 = fiche::parse(&tokenized).unwrap();
754        let output = JsonSerializer::serialize(&ir2, false).unwrap();
755
756        // Compare JSON
757        let input_value: serde_json::Value = serde_json::from_str(input).unwrap();
758        let output_value: serde_json::Value = serde_json::from_str(&output).unwrap();
759        assert_eq!(input_value, output_value);
760    }
761
762    #[test]
763    fn test_nested_object_roundtrip_deep() {
764        let input = r#"{"a":{"b":{"c":{"d":42}}}}"#;
765
766        let ir = JsonParser::parse(input).unwrap();
767        let fiche = fiche::serialize_readable(&ir, false).unwrap();
768
769        // Verify deep nesting with ჻ and superscript type
770        assert!(fiche.contains("a჻b჻c჻dⁱ"));
771
772        // Roundtrip with tokenized format
773        let tokenized = fiche::serialize(&ir, false).unwrap();
774        let ir2 = fiche::parse(&tokenized).unwrap();
775        let output = JsonSerializer::serialize(&ir2, false).unwrap();
776
777        let input_value: serde_json::Value = serde_json::from_str(input).unwrap();
778        let output_value: serde_json::Value = serde_json::from_str(&output).unwrap();
779        assert_eq!(input_value, output_value);
780    }
781
782    #[test]
783    fn test_nested_object_roundtrip_array_of_objects() {
784        let input = r#"{"students":[{"id":"A1","name":"Jim","grade":{"math":60,"physics":66}},{"id":"B2","name":"Sara","grade":{"math":85,"physics":90}}]}"#;
785
786        let ir = JsonParser::parse(input).unwrap();
787        let fiche = fiche::serialize_readable(&ir, false).unwrap();
788
789        // Verify root key and flattened nested fields with superscript types
790        assert!(fiche.starts_with("@students"));
791        assert!(fiche.contains("grade჻mathⁱ"));
792        assert!(fiche.contains("grade჻physicsⁱ"));
793
794        // Roundtrip with tokenized format
795        let tokenized = fiche::serialize(&ir, false).unwrap();
796        let ir2 = fiche::parse(&tokenized).unwrap();
797        let output = JsonSerializer::serialize(&ir2, false).unwrap();
798
799        let input_value: serde_json::Value = serde_json::from_str(input).unwrap();
800        let output_value: serde_json::Value = serde_json::from_str(&output).unwrap();
801        assert_eq!(input_value, output_value);
802    }
803
804    #[test]
805    fn test_nested_object_roundtrip_mixed_with_arrays() {
806        // Primitive arrays now stored inline
807        let input = r#"{"person":{"name":"Alice","tags":["admin","user"],"address":{"city":"Boston","zip":"02101"}}}"#;
808
809        let ir = JsonParser::parse(input).unwrap();
810        let fiche = fiche::serialize_readable(&ir, false).unwrap();
811
812        // Verify both object nesting and inline primitive arrays with superscript types
813        assert!(fiche.contains("person჻nameˢ"));
814        // Primitive arrays now inline with superscript + ⟦⟧ syntax
815        assert!(fiche.contains("person჻tagsˢ⟦⟧"));
816        assert!(fiche.contains("person჻address჻cityˢ"));
817        assert!(fiche.contains("person჻address჻zipˢ"));
818
819        // Roundtrip with tokenized format
820        let tokenized = fiche::serialize(&ir, false).unwrap();
821        let ir2 = fiche::parse(&tokenized).unwrap();
822        let output = JsonSerializer::serialize(&ir2, false).unwrap();
823
824        // Arrays are properly reconstructed
825        let expected = r#"{"person":{"address":{"city":"Boston","zip":"02101"},"name":"Alice","tags":["admin","user"]}}"#;
826        let expected_value: serde_json::Value = serde_json::from_str(expected).unwrap();
827        let output_value: serde_json::Value = serde_json::from_str(&output).unwrap();
828        assert_eq!(expected_value, output_value);
829    }
830
831    #[test]
832    fn test_nested_object_roundtrip_schema_encode() {
833        let input = r#"{"data":{"user":{"profile":{"name":"alice","age":30}}}}"#;
834
835        // Full schema pipeline: JSON → IR → binary → display96 → framed
836        let encoded = encode_schema(input, None).unwrap();
837        let decoded = decode_schema(&encoded, false).unwrap();
838
839        let input_value: serde_json::Value = serde_json::from_str(input).unwrap();
840        let output_value: serde_json::Value = serde_json::from_str(&decoded).unwrap();
841        assert_eq!(input_value, output_value);
842    }
843}