base_d/encoders/algorithms/schema/
mod.rs

1pub mod binary_packer;
2pub mod binary_unpacker;
3pub mod compression;
4pub mod display96;
5pub mod fiche;
6pub mod frame;
7pub mod parsers;
8pub mod serializers;
9pub mod types;
10
11#[cfg(test)]
12mod edge_cases;
13
14// Re-export key types for convenience
15pub use binary_packer::pack;
16pub use binary_unpacker::unpack;
17pub use compression::SchemaCompressionAlgo;
18pub use frame::{decode_framed, encode_framed};
19pub use parsers::{InputParser, JsonParser};
20pub use serializers::{JsonSerializer, OutputSerializer};
21pub use types::{
22    FieldDef, FieldType, IntermediateRepresentation, SchemaError, SchemaHeader, SchemaValue,
23};
24
25// Re-export fiche functions for library users
26#[allow(unused_imports)]
27pub use fiche::{parse as parse_fiche, serialize as serialize_fiche};
28
29/// Encode JSON to schema format: JSON → IR → binary → \[compress\] → display96 → framed
30///
31/// Transforms JSON into a compact, display-safe wire format suitable for LLM-to-LLM communication.
32/// The output is wrapped in Egyptian hieroglyph delimiters (`𓍹...𓍺`) and uses a 96-character
33/// alphabet of box-drawing and geometric shapes.
34///
35/// # Arguments
36///
37/// * `json` - JSON string to encode (must be object or array of objects)
38/// * `compress` - Optional compression algorithm (brotli, lz4, or zstd)
39///
40/// # Returns
41///
42/// Returns a framed, display-safe string like `𓍹{encoded_payload}𓍺`
43///
44/// # Errors
45///
46/// * `SchemaError::InvalidInput` - Invalid JSON or unsupported structure (e.g., root primitives)
47/// * `SchemaError::Compression` - Compression failure
48///
49/// # Example
50///
51/// ```ignore
52/// use base_d::{encode_schema, SchemaCompressionAlgo};
53///
54/// let json = r#"{"users":[{"id":1,"name":"alice"}]}"#;
55///
56/// // Without compression
57/// let encoded = encode_schema(json, None)?;
58/// println!("{}", encoded); // 𓍹╣◟╥◕◝▰◣◥▟╺▖◘▰◝▤◀╧𓍺
59///
60/// // With brotli compression
61/// let compressed = encode_schema(json, Some(SchemaCompressionAlgo::Brotli))?;
62/// ```
63///
64/// # See Also
65///
66/// * [`decode_schema`] - Decode schema format back to JSON
67/// * [SCHEMA.md](../../../SCHEMA.md) - Full format specification
68pub fn encode_schema(
69    json: &str,
70    compress: Option<SchemaCompressionAlgo>,
71) -> Result<String, SchemaError> {
72    use parsers::{InputParser, JsonParser};
73
74    let ir = JsonParser::parse(json)?;
75    let binary = pack(&ir);
76    let compressed = compression::compress_with_prefix(&binary, compress)?;
77    Ok(frame::encode_framed(&compressed))
78}
79
80/// Decode schema format to JSON: framed → display96 → \[decompress\] → binary → IR → JSON
81///
82/// Reverses the schema encoding pipeline to reconstruct the original JSON from the framed,
83/// display-safe wire format. Automatically detects and handles compression.
84///
85/// # Arguments
86///
87/// * `encoded` - Schema-encoded string with delimiters (`𓍹...𓍺`)
88/// * `pretty` - Pretty-print JSON output with indentation
89///
90/// # Returns
91///
92/// Returns the decoded JSON string (minified or pretty-printed)
93///
94/// # Errors
95///
96/// * `SchemaError::InvalidFrame` - Missing or invalid frame delimiters
97/// * `SchemaError::InvalidCharacter` - Invalid character in display96 payload
98/// * `SchemaError::Decompression` - Decompression failure
99/// * `SchemaError::UnexpectedEndOfData` - Truncated or corrupted binary data
100/// * `SchemaError::InvalidTypeTag` - Invalid type tag in header
101///
102/// # Example
103///
104/// ```ignore
105/// use base_d::decode_schema;
106///
107/// let encoded = "𓍹╣◟╥◕◝▰◣◥▟╺▖◘▰◝▤◀╧𓍺";
108///
109/// // Minified output
110/// let json = decode_schema(encoded, false)?;
111/// println!("{}", json); // {"users":[{"id":1,"name":"alice"}]}
112///
113/// // Pretty-printed output
114/// let pretty = decode_schema(encoded, true)?;
115/// println!("{}", pretty);
116/// // {
117/// //   "users": [
118/// //     {"id": 1, "name": "alice"}
119/// //   ]
120/// // }
121/// ```
122///
123/// # See Also
124///
125/// * [`encode_schema`] - Encode JSON to schema format
126/// * [SCHEMA.md](../../../SCHEMA.md) - Full format specification
127pub fn decode_schema(encoded: &str, pretty: bool) -> Result<String, SchemaError> {
128    use serializers::{JsonSerializer, OutputSerializer};
129
130    let compressed = frame::decode_framed(encoded)?;
131    let binary = compression::decompress_with_prefix(&compressed)?;
132    let ir = unpack(&binary)?;
133    JsonSerializer::serialize(&ir, pretty)
134}
135
136/// Encode JSON to fiche format: JSON → IR → fiche
137///
138/// Transforms JSON into a model-readable structured format using Unicode delimiters.
139/// Unlike carrier98 (opaque binary), fiche is designed for models to parse directly.
140///
141/// # Format
142///
143/// ```text
144/// @{root}┃{field}:{type}┃{field}:{type}...
145/// ◉{value}┃{value}┃{value}...
146/// ```
147///
148/// # Example
149///
150/// ```ignore
151/// use base_d::encode_fiche;
152///
153/// let json = r#"{"users":[{"id":1,"name":"alice"}]}"#;
154/// let fiche = encode_fiche(json)?;
155/// // @users┃id:int┃name:str
156/// // ◉1┃alice
157/// ```
158pub fn encode_fiche(json: &str, minify: bool) -> Result<String, SchemaError> {
159    encode_fiche_with_options(json, minify, true, true)
160}
161
162pub fn encode_fiche_minified(json: &str) -> Result<String, SchemaError> {
163    encode_fiche_with_options(json, true, true, true)
164}
165
166/// Encode JSON to fiche without tokenization (human-readable field names)
167pub fn encode_fiche_readable(json: &str, minify: bool) -> Result<String, SchemaError> {
168    encode_fiche_with_options(json, minify, false, false)
169}
170
171/// Encode JSON to fiche with field tokenization only (no value dictionary)
172pub fn encode_fiche_light(json: &str, minify: bool) -> Result<String, SchemaError> {
173    encode_fiche_with_options(json, minify, true, false)
174}
175
176/// Encode JSON to fiche path mode (one line per leaf value)
177pub fn encode_fiche_path(json: &str) -> Result<String, SchemaError> {
178    fiche::serialize_path_mode(json)
179}
180
181/// Decode fiche path mode to JSON
182pub fn decode_fiche_path(path_input: &str) -> Result<String, SchemaError> {
183    fiche::parse_path_mode(path_input)
184}
185
186fn encode_fiche_with_options(
187    json: &str,
188    minify: bool,
189    tokenize_fields: bool,
190    tokenize_values: bool,
191) -> Result<String, SchemaError> {
192    use parsers::{InputParser, JsonParser};
193
194    let ir = JsonParser::parse(json)?;
195    match (tokenize_fields, tokenize_values) {
196        (true, true) => fiche::serialize(&ir, minify),
197        (true, false) => fiche::serialize_light(&ir, minify),
198        (false, false) => fiche::serialize_readable(&ir, minify),
199        (false, true) => {
200            // Invalid: can't tokenize values without tokenizing fields
201            fiche::serialize_readable(&ir, minify)
202        }
203    }
204}
205
206/// Decode fiche format to JSON: fiche → IR → JSON
207///
208/// Reverses the fiche encoding to reconstruct JSON from the model-readable format.
209///
210/// # Example
211///
212/// ```ignore
213/// use base_d::decode_fiche;
214///
215/// let fiche = "@users┃id:int┃name:str\n◉1┃alice";
216/// let json = decode_fiche(fiche, false)?;
217/// // {"users":[{"id":1,"name":"alice"}]}
218/// ```
219pub fn decode_fiche(fiche_input: &str, pretty: bool) -> Result<String, SchemaError> {
220    use serializers::{JsonSerializer, OutputSerializer};
221
222    let ir = fiche::parse(fiche_input)?;
223    JsonSerializer::serialize(&ir, pretty)
224}
225
226#[cfg(test)]
227mod integration_tests {
228    use super::*;
229    use crate::encoders::algorithms::schema::types::{
230        FLAG_HAS_NULLS, FLAG_HAS_ROOT_KEY, FieldDef, FieldType, IntermediateRepresentation,
231        SchemaHeader, SchemaValue,
232    };
233    use parsers::{InputParser, JsonParser};
234    use serializers::{JsonSerializer, OutputSerializer};
235
236    #[test]
237    fn test_round_trip_simple() {
238        let fields = vec![
239            FieldDef::new("id", FieldType::U64),
240            FieldDef::new("name", FieldType::String),
241        ];
242        let header = SchemaHeader::new(2, fields);
243
244        let values = vec![
245            SchemaValue::U64(1),
246            SchemaValue::String("Alice".to_string()),
247            SchemaValue::U64(2),
248            SchemaValue::String("Bob".to_string()),
249        ];
250
251        let original = IntermediateRepresentation::new(header, values).unwrap();
252
253        // Pack and unpack
254        let packed = pack(&original);
255        let unpacked = unpack(&packed).unwrap();
256
257        assert_eq!(original, unpacked);
258    }
259
260    #[test]
261    fn test_round_trip_all_types() {
262        let fields = vec![
263            FieldDef::new("u64_field", FieldType::U64),
264            FieldDef::new("i64_field", FieldType::I64),
265            FieldDef::new("f64_field", FieldType::F64),
266            FieldDef::new("string_field", FieldType::String),
267            FieldDef::new("bool_field", FieldType::Bool),
268        ];
269        let header = SchemaHeader::new(1, fields);
270
271        let values = vec![
272            SchemaValue::U64(42),
273            SchemaValue::I64(-42),
274            SchemaValue::F64(std::f64::consts::PI),
275            SchemaValue::String("test".to_string()),
276            SchemaValue::Bool(true),
277        ];
278
279        let original = IntermediateRepresentation::new(header, values).unwrap();
280
281        let packed = pack(&original);
282        let unpacked = unpack(&packed).unwrap();
283
284        assert_eq!(original, unpacked);
285    }
286
287    #[test]
288    fn test_round_trip_with_root_key() {
289        let mut header = SchemaHeader::new(1, vec![FieldDef::new("id", FieldType::U64)]);
290        header.root_key = Some("users".to_string());
291        header.set_flag(FLAG_HAS_ROOT_KEY);
292
293        let values = vec![SchemaValue::U64(42)];
294        let original = IntermediateRepresentation::new(header, values).unwrap();
295
296        let packed = pack(&original);
297        let unpacked = unpack(&packed).unwrap();
298
299        assert_eq!(original, unpacked);
300    }
301
302    #[test]
303    fn test_round_trip_with_nulls() {
304        let mut header = SchemaHeader::new(
305            2,
306            vec![
307                FieldDef::new("id", FieldType::U64),
308                FieldDef::new("name", FieldType::String),
309            ],
310        );
311
312        // Mark second value as null (row 0, field 1)
313        let total_values: usize = 2 * 2; // 2 rows * 2 fields = 4 values
314        let bitmap_bytes = total_values.div_ceil(8); // 1 byte
315        let mut null_bitmap = vec![0u8; bitmap_bytes];
316        null_bitmap[0] |= 1 << 1; // Set bit 1 (second value)
317
318        header.null_bitmap = Some(null_bitmap);
319        header.set_flag(FLAG_HAS_NULLS);
320
321        let values = vec![
322            SchemaValue::U64(1),
323            SchemaValue::Null, // This is marked as null in bitmap
324            SchemaValue::U64(2),
325            SchemaValue::String("Bob".to_string()),
326        ];
327
328        let original = IntermediateRepresentation::new(header, values).unwrap();
329
330        let packed = pack(&original);
331        let unpacked = unpack(&packed).unwrap();
332
333        assert_eq!(original, unpacked);
334    }
335
336    #[test]
337    fn test_round_trip_array() {
338        let fields = vec![FieldDef::new(
339            "tags",
340            FieldType::Array(Box::new(FieldType::U64)),
341        )];
342        let header = SchemaHeader::new(1, fields);
343
344        let values = vec![SchemaValue::Array(vec![
345            SchemaValue::U64(1),
346            SchemaValue::U64(2),
347            SchemaValue::U64(3),
348        ])];
349
350        let original = IntermediateRepresentation::new(header, values).unwrap();
351
352        let packed = pack(&original);
353        let unpacked = unpack(&packed).unwrap();
354
355        assert_eq!(original, unpacked);
356    }
357
358    #[test]
359    fn test_round_trip_large_values() {
360        let fields = vec![
361            FieldDef::new("large_u64", FieldType::U64),
362            FieldDef::new("large_i64", FieldType::I64),
363        ];
364        let header = SchemaHeader::new(1, fields);
365
366        let values = vec![SchemaValue::U64(u64::MAX), SchemaValue::I64(i64::MIN)];
367
368        let original = IntermediateRepresentation::new(header, values).unwrap();
369
370        let packed = pack(&original);
371        let unpacked = unpack(&packed).unwrap();
372
373        assert_eq!(original, unpacked);
374    }
375
376    #[test]
377    fn test_round_trip_empty_string() {
378        let fields = vec![FieldDef::new("name", FieldType::String)];
379        let header = SchemaHeader::new(1, fields);
380
381        let values = vec![SchemaValue::String("".to_string())];
382
383        let original = IntermediateRepresentation::new(header, values).unwrap();
384
385        let packed = pack(&original);
386        let unpacked = unpack(&packed).unwrap();
387
388        assert_eq!(original, unpacked);
389    }
390
391    #[test]
392    fn test_round_trip_multiple_rows() {
393        let fields = vec![
394            FieldDef::new("id", FieldType::U64),
395            FieldDef::new("score", FieldType::F64),
396            FieldDef::new("active", FieldType::Bool),
397        ];
398        let header = SchemaHeader::new(3, fields);
399
400        let values = vec![
401            SchemaValue::U64(1),
402            SchemaValue::F64(95.5),
403            SchemaValue::Bool(true),
404            SchemaValue::U64(2),
405            SchemaValue::F64(87.3),
406            SchemaValue::Bool(false),
407            SchemaValue::U64(3),
408            SchemaValue::F64(92.1),
409            SchemaValue::Bool(true),
410        ];
411
412        let original = IntermediateRepresentation::new(header, values).unwrap();
413
414        let packed = pack(&original);
415        let unpacked = unpack(&packed).unwrap();
416
417        assert_eq!(original, unpacked);
418    }
419
420    #[test]
421    fn test_invalid_data() {
422        // Empty data
423        let result = unpack(&[]);
424        assert!(matches!(
425            result,
426            Err(SchemaError::UnexpectedEndOfData { .. })
427        ));
428
429        // Truncated data
430        let result = unpack(&[0, 1, 2]);
431        assert!(result.is_err());
432    }
433
434    #[test]
435    fn test_json_full_roundtrip() {
436        let input = r#"{"users":[{"id":1,"name":"alice"},{"id":2,"name":"bob"}]}"#;
437        let ir = JsonParser::parse(input).unwrap();
438        let binary = pack(&ir);
439        let compressed = compression::compress_with_prefix(&binary, None).unwrap();
440        let decompressed = compression::decompress_with_prefix(&compressed).unwrap();
441        let ir2 = unpack(&decompressed).unwrap();
442        let output = JsonSerializer::serialize(&ir2, false).unwrap();
443
444        // Parse both as serde_json::Value and compare (order-independent)
445        let input_value: serde_json::Value = serde_json::from_str(input).unwrap();
446        let output_value: serde_json::Value = serde_json::from_str(&output).unwrap();
447        assert_eq!(input_value, output_value);
448    }
449
450    #[test]
451    fn test_json_simple_object() {
452        let input = r#"{"id":1,"name":"alice","score":95.5}"#;
453        let ir = JsonParser::parse(input).unwrap();
454        let binary = pack(&ir);
455        let ir2 = unpack(&binary).unwrap();
456        let output = JsonSerializer::serialize(&ir2, false).unwrap();
457
458        let input_value: serde_json::Value = serde_json::from_str(input).unwrap();
459        let output_value: serde_json::Value = serde_json::from_str(&output).unwrap();
460        assert_eq!(input_value, output_value);
461    }
462
463    #[test]
464    fn test_json_swapi_nested_arrays() {
465        // SWAPI-like data with nested arrays of primitives
466        // Primitive arrays now stored inline
467        let input = r#"{"people":[{"name":"Luke","height":"172","films":["film/1","film/2"],"vehicles":[]},{"name":"C-3PO","height":"167","films":["film/1","film/2","film/3"],"vehicles":[]}]}"#;
468        let ir = JsonParser::parse(input).unwrap();
469
470        // Verify fiche representation (readable mode for string matching)
471        let fiche_output = fiche::serialize_readable(&ir, false).unwrap();
472
473        // Should have @people root key
474        assert!(fiche_output.starts_with("@people"));
475        // Primitive arrays now inline with superscript + ⟦⟧ syntax
476        assert!(fiche_output.contains("filmsˢ⟦⟧"));
477        assert!(fiche_output.contains("vehiclesˢ⟦⟧"));
478
479        // Verify round trip - arrays become indexed objects
480        let binary = pack(&ir);
481        let ir2 = unpack(&binary).unwrap();
482        let output = JsonSerializer::serialize(&ir2, false).unwrap();
483
484        // Parse output and verify structure
485        let output_value: serde_json::Value = serde_json::from_str(&output).unwrap();
486        let people = output_value
487            .as_object()
488            .unwrap()
489            .get("people")
490            .unwrap()
491            .as_array()
492            .unwrap();
493
494        // First person has films as properly reconstructed array
495        let luke = &people[0];
496        assert_eq!(luke["name"], "Luke");
497        assert_eq!(luke["height"], "172");
498        let luke_films = luke["films"].as_array().unwrap();
499        assert_eq!(luke_films[0], "film/1");
500        assert_eq!(luke_films[1], "film/2");
501    }
502
503    #[test]
504    fn test_json_wrapper_keys() {
505        // Test common pagination wrapper keys get unwrapped
506        let test_cases = vec![
507            r#"{"results":[{"id":1,"name":"a"},{"id":2,"name":"b"}]}"#,
508            r#"{"data":[{"id":1,"name":"a"},{"id":2,"name":"b"}]}"#,
509            r#"{"items":[{"id":1,"name":"a"},{"id":2,"name":"b"}]}"#,
510            r#"{"records":[{"id":1,"name":"a"},{"id":2,"name":"b"}]}"#,
511        ];
512
513        for input in test_cases {
514            let ir = JsonParser::parse(input).unwrap();
515
516            // Should have root key from wrapper
517            assert!(ir.header.root_key.is_some());
518            let root = ir.header.root_key.as_ref().unwrap();
519            assert!(root == "results" || root == "data" || root == "items" || root == "records");
520
521            // Should have 2 rows (unwrapped the array)
522            assert_eq!(ir.header.row_count, 2);
523
524            // Round trip should preserve data
525            let binary = pack(&ir);
526            let ir2 = unpack(&binary).unwrap();
527            let output = JsonSerializer::serialize(&ir2, false).unwrap();
528
529            let input_value: serde_json::Value = serde_json::from_str(input).unwrap();
530            let output_value: serde_json::Value = serde_json::from_str(&output).unwrap();
531            assert_eq!(input_value, output_value);
532        }
533    }
534
535    #[test]
536    fn test_json_nested_objects() {
537        let input = r#"{"user":{"profile":{"name":"alice","age":30}}}"#;
538        let ir = JsonParser::parse(input).unwrap();
539        let binary = pack(&ir);
540        let ir2 = unpack(&binary).unwrap();
541        let output = JsonSerializer::serialize(&ir2, false).unwrap();
542
543        let input_value: serde_json::Value = serde_json::from_str(input).unwrap();
544        let output_value: serde_json::Value = serde_json::from_str(&output).unwrap();
545        assert_eq!(input_value, output_value);
546    }
547
548    #[test]
549    fn test_json_with_nulls() {
550        let input = r#"{"name":"alice","age":null,"active":true}"#;
551        let ir = JsonParser::parse(input).unwrap();
552        assert!(ir.header.has_flag(FLAG_HAS_NULLS));
553
554        let binary = pack(&ir);
555        let ir2 = unpack(&binary).unwrap();
556        let output = JsonSerializer::serialize(&ir2, false).unwrap();
557
558        let input_value: serde_json::Value = serde_json::from_str(input).unwrap();
559        let output_value: serde_json::Value = serde_json::from_str(&output).unwrap();
560        assert_eq!(input_value, output_value);
561    }
562
563    #[test]
564    fn test_json_with_arrays() {
565        // Arrays now flatten to indexed objects
566        let input = r#"{"scores":[95,87,92],"tags":["rust","json"]}"#;
567        let ir = JsonParser::parse(input).unwrap();
568        let binary = pack(&ir);
569        let ir2 = unpack(&binary).unwrap();
570        let output = JsonSerializer::serialize(&ir2, false).unwrap();
571
572        // Expected: arrays are properly reconstructed as arrays
573        let expected = r#"{"scores":[95,87,92],"tags":["rust","json"]}"#;
574        let expected_value: serde_json::Value = serde_json::from_str(expected).unwrap();
575        let output_value: serde_json::Value = serde_json::from_str(&output).unwrap();
576        assert_eq!(expected_value, output_value);
577    }
578
579    #[test]
580    fn test_encode_schema_roundtrip() {
581        let input = r#"{"users":[{"id":1,"name":"alice"},{"id":2,"name":"bob"}]}"#;
582        let encoded = encode_schema(input, None).unwrap();
583
584        // Validate frame delimiters
585        assert!(encoded.starts_with(frame::FRAME_START));
586        assert!(encoded.ends_with(frame::FRAME_END));
587
588        // Decode back to JSON
589        let decoded = decode_schema(&encoded, false).unwrap();
590
591        // Compare as JSON values (order-independent)
592        let input_value: serde_json::Value = serde_json::from_str(input).unwrap();
593        let output_value: serde_json::Value = serde_json::from_str(&decoded).unwrap();
594        assert_eq!(input_value, output_value);
595    }
596
597    #[test]
598    fn test_encode_schema_simple() {
599        let input = r#"{"id":1,"name":"alice","score":95.5}"#;
600        let encoded = encode_schema(input, None).unwrap();
601        let decoded = decode_schema(&encoded, false).unwrap();
602
603        let input_value: serde_json::Value = serde_json::from_str(input).unwrap();
604        let output_value: serde_json::Value = serde_json::from_str(&decoded).unwrap();
605        assert_eq!(input_value, output_value);
606    }
607
608    #[test]
609    fn test_encode_schema_with_nulls() {
610        let input = r#"{"name":"alice","age":null,"active":true}"#;
611        let encoded = encode_schema(input, None).unwrap();
612        let decoded = decode_schema(&encoded, false).unwrap();
613
614        let input_value: serde_json::Value = serde_json::from_str(input).unwrap();
615        let output_value: serde_json::Value = serde_json::from_str(&decoded).unwrap();
616        assert_eq!(input_value, output_value);
617    }
618
619    #[test]
620    fn test_encode_schema_empty_object() {
621        let input = r#"{}"#;
622        let result = encode_schema(input, None);
623        // Empty objects should fail or handle gracefully
624        // This depends on JsonParser behavior
625        println!("Empty object result: {:?}", result);
626    }
627
628    #[test]
629    fn test_decode_schema_invalid_frame() {
630        let invalid = "not_framed_data";
631        let result = decode_schema(invalid, false);
632        assert!(matches!(result, Err(SchemaError::InvalidFrame(_))));
633    }
634
635    #[test]
636    fn test_decode_schema_invalid_chars() {
637        let invalid = format!("{}ABC{}", frame::FRAME_START, frame::FRAME_END);
638        let result = decode_schema(&invalid, false);
639        assert!(matches!(result, Err(SchemaError::InvalidCharacter(_))));
640    }
641
642    #[test]
643    fn test_visual_wire_format() {
644        let input = r#"{"users":[{"id":1,"name":"alice"},{"id":2,"name":"bob"}]}"#;
645        let encoded = encode_schema(input, None).unwrap();
646
647        println!("\n=== Visual Wire Format ===");
648        println!("Input JSON: {}", input);
649        println!("Input length: {} bytes", input.len());
650        println!("\nEncoded output: {}", encoded);
651        println!(
652            "Encoded length: {} chars ({} bytes UTF-8)",
653            encoded.chars().count(),
654            encoded.len()
655        );
656
657        // Calculate compression ratio
658        let compression_ratio = input.len() as f64 / encoded.len() as f64;
659        println!("Compression ratio: {:.2}x", compression_ratio);
660
661        // Decode and verify
662        let decoded = decode_schema(&encoded, false).unwrap();
663        let input_value: serde_json::Value = serde_json::from_str(input).unwrap();
664        let output_value: serde_json::Value = serde_json::from_str(&decoded).unwrap();
665        assert_eq!(input_value, output_value);
666        println!("Roundtrip verified ✓\n");
667    }
668
669    #[test]
670    fn test_compression_comparison() {
671        let test_cases = [
672            r#"{"id":1}"#,
673            r#"{"id":1,"name":"alice"}"#,
674            r#"{"users":[{"id":1,"name":"alice"},{"id":2,"name":"bob"}]}"#,
675            r#"{"data":[1,2,3,4,5,6,7,8,9,10]}"#,
676        ];
677
678        println!("\n=== Compression Comparison ===");
679        for (i, input) in test_cases.iter().enumerate() {
680            let encoded = encode_schema(input, None).unwrap();
681            let ratio = input.len() as f64 / encoded.len() as f64;
682
683            println!(
684                "Test case {}: {} bytes → {} bytes ({:.2}x)",
685                i + 1,
686                input.len(),
687                encoded.len(),
688                ratio
689            );
690        }
691        println!();
692    }
693
694    #[test]
695    fn test_encode_schema_with_compression() {
696        use super::SchemaCompressionAlgo;
697
698        let input = r#"{"users":[{"id":1,"name":"alice"},{"id":2,"name":"bob"},{"id":3,"name":"charlie"}]}"#;
699
700        // Test each compression algorithm
701        for algo in [
702            SchemaCompressionAlgo::Brotli,
703            SchemaCompressionAlgo::Lz4,
704            SchemaCompressionAlgo::Zstd,
705        ] {
706            let encoded = encode_schema(input, Some(algo)).unwrap();
707            let decoded = decode_schema(&encoded, false).unwrap();
708
709            let input_value: serde_json::Value = serde_json::from_str(input).unwrap();
710            let output_value: serde_json::Value = serde_json::from_str(&decoded).unwrap();
711            assert_eq!(
712                input_value, output_value,
713                "Failed for compression algorithm: {:?}",
714                algo
715            );
716        }
717    }
718
719    #[test]
720    fn test_compression_size_comparison() {
721        use super::SchemaCompressionAlgo;
722
723        let input = r#"{"users":[{"id":1,"name":"alice","active":true,"score":95.5},{"id":2,"name":"bob","active":false,"score":87.3},{"id":3,"name":"charlie","active":true,"score":92.1}]}"#;
724
725        println!("\n=== Compression Size Comparison ===");
726        println!("Input JSON: {} bytes", input.len());
727
728        let no_compress = encode_schema(input, None).unwrap();
729        println!("No compression: {} bytes", no_compress.len());
730
731        for algo in [
732            SchemaCompressionAlgo::Brotli,
733            SchemaCompressionAlgo::Lz4,
734            SchemaCompressionAlgo::Zstd,
735        ] {
736            let compressed = encode_schema(input, Some(algo)).unwrap();
737            let ratio = no_compress.len() as f64 / compressed.len() as f64;
738            println!(
739                "{:?}: {} bytes ({:.2}x vs uncompressed)",
740                algo,
741                compressed.len(),
742                ratio
743            );
744        }
745        println!();
746    }
747
748    #[test]
749    fn test_nested_object_roundtrip_single_level() {
750        let input = r#"{"id":"A1","name":"Jim","grade":{"math":60,"physics":66,"chemistry":61}}"#;
751
752        // JSON → IR → fiche (readable for string matching)
753        let ir = JsonParser::parse(input).unwrap();
754        let fiche = fiche::serialize_readable(&ir, false).unwrap();
755
756        // Verify flattened field names with ჻ and superscript types
757        assert!(fiche.contains("grade჻mathⁱ"));
758        assert!(fiche.contains("grade჻physicsⁱ"));
759        assert!(fiche.contains("grade჻chemistryⁱ"));
760
761        // fiche → IR → JSON (using tokenized format for roundtrip)
762        let tokenized = fiche::serialize(&ir, false).unwrap();
763        let ir2 = fiche::parse(&tokenized).unwrap();
764        let output = JsonSerializer::serialize(&ir2, false).unwrap();
765
766        // Compare JSON
767        let input_value: serde_json::Value = serde_json::from_str(input).unwrap();
768        let output_value: serde_json::Value = serde_json::from_str(&output).unwrap();
769        assert_eq!(input_value, output_value);
770    }
771
772    #[test]
773    fn test_nested_object_roundtrip_deep() {
774        let input = r#"{"a":{"b":{"c":{"d":42}}}}"#;
775
776        let ir = JsonParser::parse(input).unwrap();
777        let fiche = fiche::serialize_readable(&ir, false).unwrap();
778
779        // Verify deep nesting with ჻ and superscript type
780        assert!(fiche.contains("a჻b჻c჻dⁱ"));
781
782        // Roundtrip with tokenized format
783        let tokenized = fiche::serialize(&ir, false).unwrap();
784        let ir2 = fiche::parse(&tokenized).unwrap();
785        let output = JsonSerializer::serialize(&ir2, false).unwrap();
786
787        let input_value: serde_json::Value = serde_json::from_str(input).unwrap();
788        let output_value: serde_json::Value = serde_json::from_str(&output).unwrap();
789        assert_eq!(input_value, output_value);
790    }
791
792    #[test]
793    fn test_nested_object_roundtrip_array_of_objects() {
794        let input = r#"{"students":[{"id":"A1","name":"Jim","grade":{"math":60,"physics":66}},{"id":"B2","name":"Sara","grade":{"math":85,"physics":90}}]}"#;
795
796        let ir = JsonParser::parse(input).unwrap();
797        let fiche = fiche::serialize_readable(&ir, false).unwrap();
798
799        // Verify root key and flattened nested fields with superscript types
800        assert!(fiche.starts_with("@students"));
801        assert!(fiche.contains("grade჻mathⁱ"));
802        assert!(fiche.contains("grade჻physicsⁱ"));
803
804        // Roundtrip with tokenized format
805        let tokenized = fiche::serialize(&ir, false).unwrap();
806        let ir2 = fiche::parse(&tokenized).unwrap();
807        let output = JsonSerializer::serialize(&ir2, false).unwrap();
808
809        let input_value: serde_json::Value = serde_json::from_str(input).unwrap();
810        let output_value: serde_json::Value = serde_json::from_str(&output).unwrap();
811        assert_eq!(input_value, output_value);
812    }
813
814    #[test]
815    fn test_nested_object_roundtrip_mixed_with_arrays() {
816        // Primitive arrays now stored inline
817        let input = r#"{"person":{"name":"Alice","tags":["admin","user"],"address":{"city":"Boston","zip":"02101"}}}"#;
818
819        let ir = JsonParser::parse(input).unwrap();
820        let fiche = fiche::serialize_readable(&ir, false).unwrap();
821
822        // Verify both object nesting and inline primitive arrays with superscript types
823        assert!(fiche.contains("person჻nameˢ"));
824        // Primitive arrays now inline with superscript + ⟦⟧ syntax
825        assert!(fiche.contains("person჻tagsˢ⟦⟧"));
826        assert!(fiche.contains("person჻address჻cityˢ"));
827        assert!(fiche.contains("person჻address჻zipˢ"));
828
829        // Roundtrip with tokenized format
830        let tokenized = fiche::serialize(&ir, false).unwrap();
831        let ir2 = fiche::parse(&tokenized).unwrap();
832        let output = JsonSerializer::serialize(&ir2, false).unwrap();
833
834        // Arrays are properly reconstructed
835        let expected = r#"{"person":{"address":{"city":"Boston","zip":"02101"},"name":"Alice","tags":["admin","user"]}}"#;
836        let expected_value: serde_json::Value = serde_json::from_str(expected).unwrap();
837        let output_value: serde_json::Value = serde_json::from_str(&output).unwrap();
838        assert_eq!(expected_value, output_value);
839    }
840
841    #[test]
842    fn test_nested_object_roundtrip_schema_encode() {
843        let input = r#"{"data":{"user":{"profile":{"name":"alice","age":30}}}}"#;
844
845        // Full schema pipeline: JSON → IR → binary → display96 → framed
846        let encoded = encode_schema(input, None).unwrap();
847        let decoded = decode_schema(&encoded, false).unwrap();
848
849        let input_value: serde_json::Value = serde_json::from_str(input).unwrap();
850        let output_value: serde_json::Value = serde_json::from_str(&decoded).unwrap();
851        assert_eq!(input_value, output_value);
852    }
853}