base_d/encoders/algorithms/schema/
mod.rs

1pub mod binary_packer;
2pub mod binary_unpacker;
3pub mod compression;
4pub mod display96;
5pub mod fiche;
6pub mod fiche_analyzer;
7pub mod frame;
8pub mod parsers;
9pub mod serializers;
10pub mod types;
11
12#[cfg(test)]
13mod edge_cases;
14
15// Re-export key types for convenience
16pub use binary_packer::pack;
17pub use binary_unpacker::unpack;
18pub use compression::SchemaCompressionAlgo;
19pub use frame::{decode_framed, encode_framed};
20pub use parsers::{InputParser, JsonParser};
21pub use serializers::{JsonSerializer, OutputSerializer};
22pub use types::{
23    FieldDef, FieldType, IntermediateRepresentation, SchemaError, SchemaHeader, SchemaValue,
24};
25
26// Re-export fiche functions for library users
27#[allow(unused_imports)]
28pub use fiche::{parse as parse_fiche, serialize as serialize_fiche};
29
30/// Encode JSON to schema format: JSON → IR → binary → \[compress\] → display96 → framed
31///
32/// Transforms JSON into a compact, display-safe wire format suitable for LLM-to-LLM communication.
33/// The output is wrapped in Egyptian hieroglyph delimiters (`𓍹...𓍺`) and uses a 96-character
34/// alphabet of box-drawing and geometric shapes.
35///
36/// # Arguments
37///
38/// * `json` - JSON string to encode (must be object or array of objects)
39/// * `compress` - Optional compression algorithm (brotli, lz4, or zstd)
40///
41/// # Returns
42///
43/// Returns a framed, display-safe string like `𓍹{encoded_payload}𓍺`
44///
45/// # Errors
46///
47/// * `SchemaError::InvalidInput` - Invalid JSON or unsupported structure (e.g., root primitives)
48/// * `SchemaError::Compression` - Compression failure
49///
50/// # Example
51///
52/// ```ignore
53/// use base_d::{encode_schema, SchemaCompressionAlgo};
54///
55/// let json = r#"{"users":[{"id":1,"name":"alice"}]}"#;
56///
57/// // Without compression
58/// let encoded = encode_schema(json, None)?;
59/// println!("{}", encoded); // 𓍹╣◟╥◕◝▰◣◥▟╺▖◘▰◝▤◀╧𓍺
60///
61/// // With brotli compression
62/// let compressed = encode_schema(json, Some(SchemaCompressionAlgo::Brotli))?;
63/// ```
64///
65/// # See Also
66///
67/// * [`decode_schema`] - Decode schema format back to JSON
68/// * [SCHEMA.md](../../../SCHEMA.md) - Full format specification
69pub fn encode_schema(
70    json: &str,
71    compress: Option<SchemaCompressionAlgo>,
72) -> Result<String, SchemaError> {
73    use parsers::{InputParser, JsonParser};
74
75    let ir = JsonParser::parse(json)?;
76    let binary = pack(&ir);
77    let compressed = compression::compress_with_prefix(&binary, compress)?;
78    Ok(frame::encode_framed(&compressed))
79}
80
81/// Decode schema format to JSON: framed → display96 → \[decompress\] → binary → IR → JSON
82///
83/// Reverses the schema encoding pipeline to reconstruct the original JSON from the framed,
84/// display-safe wire format. Automatically detects and handles compression.
85///
86/// # Arguments
87///
88/// * `encoded` - Schema-encoded string with delimiters (`𓍹...𓍺`)
89/// * `pretty` - Pretty-print JSON output with indentation
90///
91/// # Returns
92///
93/// Returns the decoded JSON string (minified or pretty-printed)
94///
95/// # Errors
96///
97/// * `SchemaError::InvalidFrame` - Missing or invalid frame delimiters
98/// * `SchemaError::InvalidCharacter` - Invalid character in display96 payload
99/// * `SchemaError::Decompression` - Decompression failure
100/// * `SchemaError::UnexpectedEndOfData` - Truncated or corrupted binary data
101/// * `SchemaError::InvalidTypeTag` - Invalid type tag in header
102///
103/// # Example
104///
105/// ```ignore
106/// use base_d::decode_schema;
107///
108/// let encoded = "𓍹╣◟╥◕◝▰◣◥▟╺▖◘▰◝▤◀╧𓍺";
109///
110/// // Minified output
111/// let json = decode_schema(encoded, false)?;
112/// println!("{}", json); // {"users":[{"id":1,"name":"alice"}]}
113///
114/// // Pretty-printed output
115/// let pretty = decode_schema(encoded, true)?;
116/// println!("{}", pretty);
117/// // {
118/// //   "users": [
119/// //     {"id": 1, "name": "alice"}
120/// //   ]
121/// // }
122/// ```
123///
124/// # See Also
125///
126/// * [`encode_schema`] - Encode JSON to schema format
127/// * [SCHEMA.md](../../../SCHEMA.md) - Full format specification
128pub fn decode_schema(encoded: &str, pretty: bool) -> Result<String, SchemaError> {
129    use serializers::{JsonSerializer, OutputSerializer};
130
131    let compressed = frame::decode_framed(encoded)?;
132    let binary = compression::decompress_with_prefix(&compressed)?;
133    let ir = unpack(&binary)?;
134    JsonSerializer::serialize(&ir, pretty)
135}
136
137/// Encode JSON to fiche format: JSON → IR → fiche
138///
139/// Transforms JSON into a model-readable structured format using Unicode delimiters.
140/// Unlike carrier98 (opaque binary), fiche is designed for models to parse directly.
141///
142/// # Format
143///
144/// ```text
145/// @{root}┃{field}:{type}┃{field}:{type}...
146/// ◉{value}┃{value}┃{value}...
147/// ```
148///
149/// # Example
150///
151/// ```ignore
152/// use base_d::encode_fiche;
153///
154/// let json = r#"{"users":[{"id":1,"name":"alice"}]}"#;
155/// let fiche = encode_fiche(json)?;
156/// // @users┃id:int┃name:str
157/// // ◉1┃alice
158/// ```
159pub fn encode_fiche(json: &str, minify: bool) -> Result<String, SchemaError> {
160    encode_fiche_with_options(json, minify, true, true)
161}
162
163pub fn encode_fiche_minified(json: &str) -> Result<String, SchemaError> {
164    encode_fiche_with_options(json, true, true, true)
165}
166
167/// Encode JSON to fiche without tokenization (human-readable field names)
168pub fn encode_fiche_readable(json: &str, minify: bool) -> Result<String, SchemaError> {
169    encode_fiche_with_options(json, minify, false, false)
170}
171
172/// Encode JSON to fiche with field tokenization only (no value dictionary)
173pub fn encode_fiche_light(json: &str, minify: bool) -> Result<String, SchemaError> {
174    encode_fiche_with_options(json, minify, true, false)
175}
176
177/// Encode JSON to fiche path mode (one line per leaf value)
178pub fn encode_fiche_path(json: &str) -> Result<String, SchemaError> {
179    fiche::serialize_path_mode(json)
180}
181
182/// Decode fiche path mode to JSON
183pub fn decode_fiche_path(path_input: &str) -> Result<String, SchemaError> {
184    fiche::parse_path_mode(path_input)
185}
186
187fn encode_fiche_with_options(
188    json: &str,
189    minify: bool,
190    tokenize_fields: bool,
191    tokenize_values: bool,
192) -> Result<String, SchemaError> {
193    use parsers::{InputParser, JsonParser};
194
195    let ir = JsonParser::parse(json)?;
196    match (tokenize_fields, tokenize_values) {
197        (true, true) => fiche::serialize(&ir, minify),
198        (true, false) => fiche::serialize_light(&ir, minify),
199        (false, false) => fiche::serialize_readable(&ir, minify),
200        (false, true) => {
201            // Invalid: can't tokenize values without tokenizing fields
202            fiche::serialize_readable(&ir, minify)
203        }
204    }
205}
206
207/// Decode fiche format to JSON: fiche → IR → JSON
208///
209/// Reverses the fiche encoding to reconstruct JSON from the model-readable format.
210///
211/// # Example
212///
213/// ```ignore
214/// use base_d::decode_fiche;
215///
216/// let fiche = "@users┃id:int┃name:str\n◉1┃alice";
217/// let json = decode_fiche(fiche, false)?;
218/// // {"users":[{"id":1,"name":"alice"}]}
219/// ```
220pub fn decode_fiche(fiche_input: &str, pretty: bool) -> Result<String, SchemaError> {
221    use serializers::{JsonSerializer, OutputSerializer};
222
223    let ir = fiche::parse(fiche_input)?;
224    JsonSerializer::serialize(&ir, pretty)
225}
226
227#[cfg(test)]
228mod integration_tests {
229    use super::*;
230    use crate::encoders::algorithms::schema::types::{
231        FLAG_HAS_NULLS, FLAG_HAS_ROOT_KEY, FieldDef, FieldType, IntermediateRepresentation,
232        SchemaHeader, SchemaValue,
233    };
234    use parsers::{InputParser, JsonParser};
235    use serializers::{JsonSerializer, OutputSerializer};
236
237    #[test]
238    fn test_round_trip_simple() {
239        let fields = vec![
240            FieldDef::new("id", FieldType::U64),
241            FieldDef::new("name", FieldType::String),
242        ];
243        let header = SchemaHeader::new(2, fields);
244
245        let values = vec![
246            SchemaValue::U64(1),
247            SchemaValue::String("Alice".to_string()),
248            SchemaValue::U64(2),
249            SchemaValue::String("Bob".to_string()),
250        ];
251
252        let original = IntermediateRepresentation::new(header, values).unwrap();
253
254        // Pack and unpack
255        let packed = pack(&original);
256        let unpacked = unpack(&packed).unwrap();
257
258        assert_eq!(original, unpacked);
259    }
260
261    #[test]
262    fn test_round_trip_all_types() {
263        let fields = vec![
264            FieldDef::new("u64_field", FieldType::U64),
265            FieldDef::new("i64_field", FieldType::I64),
266            FieldDef::new("f64_field", FieldType::F64),
267            FieldDef::new("string_field", FieldType::String),
268            FieldDef::new("bool_field", FieldType::Bool),
269        ];
270        let header = SchemaHeader::new(1, fields);
271
272        let values = vec![
273            SchemaValue::U64(42),
274            SchemaValue::I64(-42),
275            SchemaValue::F64(std::f64::consts::PI),
276            SchemaValue::String("test".to_string()),
277            SchemaValue::Bool(true),
278        ];
279
280        let original = IntermediateRepresentation::new(header, values).unwrap();
281
282        let packed = pack(&original);
283        let unpacked = unpack(&packed).unwrap();
284
285        assert_eq!(original, unpacked);
286    }
287
288    #[test]
289    fn test_round_trip_with_root_key() {
290        let mut header = SchemaHeader::new(1, vec![FieldDef::new("id", FieldType::U64)]);
291        header.root_key = Some("users".to_string());
292        header.set_flag(FLAG_HAS_ROOT_KEY);
293
294        let values = vec![SchemaValue::U64(42)];
295        let original = IntermediateRepresentation::new(header, values).unwrap();
296
297        let packed = pack(&original);
298        let unpacked = unpack(&packed).unwrap();
299
300        assert_eq!(original, unpacked);
301    }
302
303    #[test]
304    fn test_round_trip_with_nulls() {
305        let mut header = SchemaHeader::new(
306            2,
307            vec![
308                FieldDef::new("id", FieldType::U64),
309                FieldDef::new("name", FieldType::String),
310            ],
311        );
312
313        // Mark second value as null (row 0, field 1)
314        let total_values: usize = 2 * 2; // 2 rows * 2 fields = 4 values
315        let bitmap_bytes = total_values.div_ceil(8); // 1 byte
316        let mut null_bitmap = vec![0u8; bitmap_bytes];
317        null_bitmap[0] |= 1 << 1; // Set bit 1 (second value)
318
319        header.null_bitmap = Some(null_bitmap);
320        header.set_flag(FLAG_HAS_NULLS);
321
322        let values = vec![
323            SchemaValue::U64(1),
324            SchemaValue::Null, // This is marked as null in bitmap
325            SchemaValue::U64(2),
326            SchemaValue::String("Bob".to_string()),
327        ];
328
329        let original = IntermediateRepresentation::new(header, values).unwrap();
330
331        let packed = pack(&original);
332        let unpacked = unpack(&packed).unwrap();
333
334        assert_eq!(original, unpacked);
335    }
336
337    #[test]
338    fn test_round_trip_array() {
339        let fields = vec![FieldDef::new(
340            "tags",
341            FieldType::Array(Box::new(FieldType::U64)),
342        )];
343        let header = SchemaHeader::new(1, fields);
344
345        let values = vec![SchemaValue::Array(vec![
346            SchemaValue::U64(1),
347            SchemaValue::U64(2),
348            SchemaValue::U64(3),
349        ])];
350
351        let original = IntermediateRepresentation::new(header, values).unwrap();
352
353        let packed = pack(&original);
354        let unpacked = unpack(&packed).unwrap();
355
356        assert_eq!(original, unpacked);
357    }
358
359    #[test]
360    fn test_round_trip_large_values() {
361        let fields = vec![
362            FieldDef::new("large_u64", FieldType::U64),
363            FieldDef::new("large_i64", FieldType::I64),
364        ];
365        let header = SchemaHeader::new(1, fields);
366
367        let values = vec![SchemaValue::U64(u64::MAX), SchemaValue::I64(i64::MIN)];
368
369        let original = IntermediateRepresentation::new(header, values).unwrap();
370
371        let packed = pack(&original);
372        let unpacked = unpack(&packed).unwrap();
373
374        assert_eq!(original, unpacked);
375    }
376
377    #[test]
378    fn test_round_trip_empty_string() {
379        let fields = vec![FieldDef::new("name", FieldType::String)];
380        let header = SchemaHeader::new(1, fields);
381
382        let values = vec![SchemaValue::String("".to_string())];
383
384        let original = IntermediateRepresentation::new(header, values).unwrap();
385
386        let packed = pack(&original);
387        let unpacked = unpack(&packed).unwrap();
388
389        assert_eq!(original, unpacked);
390    }
391
392    #[test]
393    fn test_round_trip_multiple_rows() {
394        let fields = vec![
395            FieldDef::new("id", FieldType::U64),
396            FieldDef::new("score", FieldType::F64),
397            FieldDef::new("active", FieldType::Bool),
398        ];
399        let header = SchemaHeader::new(3, fields);
400
401        let values = vec![
402            SchemaValue::U64(1),
403            SchemaValue::F64(95.5),
404            SchemaValue::Bool(true),
405            SchemaValue::U64(2),
406            SchemaValue::F64(87.3),
407            SchemaValue::Bool(false),
408            SchemaValue::U64(3),
409            SchemaValue::F64(92.1),
410            SchemaValue::Bool(true),
411        ];
412
413        let original = IntermediateRepresentation::new(header, values).unwrap();
414
415        let packed = pack(&original);
416        let unpacked = unpack(&packed).unwrap();
417
418        assert_eq!(original, unpacked);
419    }
420
421    #[test]
422    fn test_invalid_data() {
423        // Empty data
424        let result = unpack(&[]);
425        assert!(matches!(
426            result,
427            Err(SchemaError::UnexpectedEndOfData { .. })
428        ));
429
430        // Truncated data
431        let result = unpack(&[0, 1, 2]);
432        assert!(result.is_err());
433    }
434
435    #[test]
436    fn test_json_full_roundtrip() {
437        let input = r#"{"users":[{"id":1,"name":"alice"},{"id":2,"name":"bob"}]}"#;
438        let ir = JsonParser::parse(input).unwrap();
439        let binary = pack(&ir);
440        let compressed = compression::compress_with_prefix(&binary, None).unwrap();
441        let decompressed = compression::decompress_with_prefix(&compressed).unwrap();
442        let ir2 = unpack(&decompressed).unwrap();
443        let output = JsonSerializer::serialize(&ir2, false).unwrap();
444
445        // Parse both as serde_json::Value and compare (order-independent)
446        let input_value: serde_json::Value = serde_json::from_str(input).unwrap();
447        let output_value: serde_json::Value = serde_json::from_str(&output).unwrap();
448        assert_eq!(input_value, output_value);
449    }
450
451    #[test]
452    fn test_json_simple_object() {
453        let input = r#"{"id":1,"name":"alice","score":95.5}"#;
454        let ir = JsonParser::parse(input).unwrap();
455        let binary = pack(&ir);
456        let ir2 = unpack(&binary).unwrap();
457        let output = JsonSerializer::serialize(&ir2, false).unwrap();
458
459        let input_value: serde_json::Value = serde_json::from_str(input).unwrap();
460        let output_value: serde_json::Value = serde_json::from_str(&output).unwrap();
461        assert_eq!(input_value, output_value);
462    }
463
464    #[test]
465    fn test_json_swapi_nested_arrays() {
466        // SWAPI-like data with nested arrays of primitives
467        // Primitive arrays now stored inline
468        let input = r#"{"people":[{"name":"Luke","height":"172","films":["film/1","film/2"],"vehicles":[]},{"name":"C-3PO","height":"167","films":["film/1","film/2","film/3"],"vehicles":[]}]}"#;
469        let ir = JsonParser::parse(input).unwrap();
470
471        // Verify fiche representation (readable mode for string matching)
472        let fiche_output = fiche::serialize_readable(&ir, false).unwrap();
473
474        // Should have @people root key
475        assert!(fiche_output.starts_with("@people"));
476        // Primitive arrays now inline with superscript + ⟦⟧ syntax
477        assert!(fiche_output.contains("filmsˢ⟦⟧"));
478        assert!(fiche_output.contains("vehiclesˢ⟦⟧"));
479
480        // Verify round trip - arrays become indexed objects
481        let binary = pack(&ir);
482        let ir2 = unpack(&binary).unwrap();
483        let output = JsonSerializer::serialize(&ir2, false).unwrap();
484
485        // Parse output and verify structure
486        let output_value: serde_json::Value = serde_json::from_str(&output).unwrap();
487        let people = output_value
488            .as_object()
489            .unwrap()
490            .get("people")
491            .unwrap()
492            .as_array()
493            .unwrap();
494
495        // First person has films as properly reconstructed array
496        let luke = &people[0];
497        assert_eq!(luke["name"], "Luke");
498        assert_eq!(luke["height"], "172");
499        let luke_films = luke["films"].as_array().unwrap();
500        assert_eq!(luke_films[0], "film/1");
501        assert_eq!(luke_films[1], "film/2");
502    }
503
504    #[test]
505    fn test_json_wrapper_keys() {
506        // Test common pagination wrapper keys get unwrapped
507        let test_cases = vec![
508            r#"{"results":[{"id":1,"name":"a"},{"id":2,"name":"b"}]}"#,
509            r#"{"data":[{"id":1,"name":"a"},{"id":2,"name":"b"}]}"#,
510            r#"{"items":[{"id":1,"name":"a"},{"id":2,"name":"b"}]}"#,
511            r#"{"records":[{"id":1,"name":"a"},{"id":2,"name":"b"}]}"#,
512        ];
513
514        for input in test_cases {
515            let ir = JsonParser::parse(input).unwrap();
516
517            // Should have root key from wrapper
518            assert!(ir.header.root_key.is_some());
519            let root = ir.header.root_key.as_ref().unwrap();
520            assert!(root == "results" || root == "data" || root == "items" || root == "records");
521
522            // Should have 2 rows (unwrapped the array)
523            assert_eq!(ir.header.row_count, 2);
524
525            // Round trip should preserve data
526            let binary = pack(&ir);
527            let ir2 = unpack(&binary).unwrap();
528            let output = JsonSerializer::serialize(&ir2, false).unwrap();
529
530            let input_value: serde_json::Value = serde_json::from_str(input).unwrap();
531            let output_value: serde_json::Value = serde_json::from_str(&output).unwrap();
532            assert_eq!(input_value, output_value);
533        }
534    }
535
536    #[test]
537    fn test_json_nested_objects() {
538        let input = r#"{"user":{"profile":{"name":"alice","age":30}}}"#;
539        let ir = JsonParser::parse(input).unwrap();
540        let binary = pack(&ir);
541        let ir2 = unpack(&binary).unwrap();
542        let output = JsonSerializer::serialize(&ir2, false).unwrap();
543
544        let input_value: serde_json::Value = serde_json::from_str(input).unwrap();
545        let output_value: serde_json::Value = serde_json::from_str(&output).unwrap();
546        assert_eq!(input_value, output_value);
547    }
548
549    #[test]
550    fn test_json_with_nulls() {
551        let input = r#"{"name":"alice","age":null,"active":true}"#;
552        let ir = JsonParser::parse(input).unwrap();
553        assert!(ir.header.has_flag(FLAG_HAS_NULLS));
554
555        let binary = pack(&ir);
556        let ir2 = unpack(&binary).unwrap();
557        let output = JsonSerializer::serialize(&ir2, false).unwrap();
558
559        let input_value: serde_json::Value = serde_json::from_str(input).unwrap();
560        let output_value: serde_json::Value = serde_json::from_str(&output).unwrap();
561        assert_eq!(input_value, output_value);
562    }
563
564    #[test]
565    fn test_json_with_arrays() {
566        // Arrays now flatten to indexed objects
567        let input = r#"{"scores":[95,87,92],"tags":["rust","json"]}"#;
568        let ir = JsonParser::parse(input).unwrap();
569        let binary = pack(&ir);
570        let ir2 = unpack(&binary).unwrap();
571        let output = JsonSerializer::serialize(&ir2, false).unwrap();
572
573        // Expected: arrays are properly reconstructed as arrays
574        let expected = r#"{"scores":[95,87,92],"tags":["rust","json"]}"#;
575        let expected_value: serde_json::Value = serde_json::from_str(expected).unwrap();
576        let output_value: serde_json::Value = serde_json::from_str(&output).unwrap();
577        assert_eq!(expected_value, output_value);
578    }
579
580    #[test]
581    fn test_encode_schema_roundtrip() {
582        let input = r#"{"users":[{"id":1,"name":"alice"},{"id":2,"name":"bob"}]}"#;
583        let encoded = encode_schema(input, None).unwrap();
584
585        // Validate frame delimiters
586        assert!(encoded.starts_with(frame::FRAME_START));
587        assert!(encoded.ends_with(frame::FRAME_END));
588
589        // Decode back to JSON
590        let decoded = decode_schema(&encoded, false).unwrap();
591
592        // Compare as JSON values (order-independent)
593        let input_value: serde_json::Value = serde_json::from_str(input).unwrap();
594        let output_value: serde_json::Value = serde_json::from_str(&decoded).unwrap();
595        assert_eq!(input_value, output_value);
596    }
597
598    #[test]
599    fn test_encode_schema_simple() {
600        let input = r#"{"id":1,"name":"alice","score":95.5}"#;
601        let encoded = encode_schema(input, None).unwrap();
602        let decoded = decode_schema(&encoded, false).unwrap();
603
604        let input_value: serde_json::Value = serde_json::from_str(input).unwrap();
605        let output_value: serde_json::Value = serde_json::from_str(&decoded).unwrap();
606        assert_eq!(input_value, output_value);
607    }
608
609    #[test]
610    fn test_encode_schema_with_nulls() {
611        let input = r#"{"name":"alice","age":null,"active":true}"#;
612        let encoded = encode_schema(input, None).unwrap();
613        let decoded = decode_schema(&encoded, false).unwrap();
614
615        let input_value: serde_json::Value = serde_json::from_str(input).unwrap();
616        let output_value: serde_json::Value = serde_json::from_str(&decoded).unwrap();
617        assert_eq!(input_value, output_value);
618    }
619
620    #[test]
621    fn test_encode_schema_empty_object() {
622        let input = r#"{}"#;
623        let result = encode_schema(input, None);
624        // Empty objects should fail or handle gracefully
625        // This depends on JsonParser behavior
626        println!("Empty object result: {:?}", result);
627    }
628
629    #[test]
630    fn test_decode_schema_invalid_frame() {
631        let invalid = "not_framed_data";
632        let result = decode_schema(invalid, false);
633        assert!(matches!(result, Err(SchemaError::InvalidFrame(_))));
634    }
635
636    #[test]
637    fn test_decode_schema_invalid_chars() {
638        let invalid = format!("{}ABC{}", frame::FRAME_START, frame::FRAME_END);
639        let result = decode_schema(&invalid, false);
640        assert!(matches!(result, Err(SchemaError::InvalidCharacter(_))));
641    }
642
643    #[test]
644    fn test_visual_wire_format() {
645        let input = r#"{"users":[{"id":1,"name":"alice"},{"id":2,"name":"bob"}]}"#;
646        let encoded = encode_schema(input, None).unwrap();
647
648        println!("\n=== Visual Wire Format ===");
649        println!("Input JSON: {}", input);
650        println!("Input length: {} bytes", input.len());
651        println!("\nEncoded output: {}", encoded);
652        println!(
653            "Encoded length: {} chars ({} bytes UTF-8)",
654            encoded.chars().count(),
655            encoded.len()
656        );
657
658        // Calculate compression ratio
659        let compression_ratio = input.len() as f64 / encoded.len() as f64;
660        println!("Compression ratio: {:.2}x", compression_ratio);
661
662        // Decode and verify
663        let decoded = decode_schema(&encoded, false).unwrap();
664        let input_value: serde_json::Value = serde_json::from_str(input).unwrap();
665        let output_value: serde_json::Value = serde_json::from_str(&decoded).unwrap();
666        assert_eq!(input_value, output_value);
667        println!("Roundtrip verified ✓\n");
668    }
669
670    #[test]
671    fn test_compression_comparison() {
672        let test_cases = [
673            r#"{"id":1}"#,
674            r#"{"id":1,"name":"alice"}"#,
675            r#"{"users":[{"id":1,"name":"alice"},{"id":2,"name":"bob"}]}"#,
676            r#"{"data":[1,2,3,4,5,6,7,8,9,10]}"#,
677        ];
678
679        println!("\n=== Compression Comparison ===");
680        for (i, input) in test_cases.iter().enumerate() {
681            let encoded = encode_schema(input, None).unwrap();
682            let ratio = input.len() as f64 / encoded.len() as f64;
683
684            println!(
685                "Test case {}: {} bytes → {} bytes ({:.2}x)",
686                i + 1,
687                input.len(),
688                encoded.len(),
689                ratio
690            );
691        }
692        println!();
693    }
694
695    #[test]
696    fn test_encode_schema_with_compression() {
697        use super::SchemaCompressionAlgo;
698
699        let input = r#"{"users":[{"id":1,"name":"alice"},{"id":2,"name":"bob"},{"id":3,"name":"charlie"}]}"#;
700
701        // Test each compression algorithm
702        for algo in [
703            SchemaCompressionAlgo::Brotli,
704            SchemaCompressionAlgo::Lz4,
705            SchemaCompressionAlgo::Zstd,
706        ] {
707            let encoded = encode_schema(input, Some(algo)).unwrap();
708            let decoded = decode_schema(&encoded, false).unwrap();
709
710            let input_value: serde_json::Value = serde_json::from_str(input).unwrap();
711            let output_value: serde_json::Value = serde_json::from_str(&decoded).unwrap();
712            assert_eq!(
713                input_value, output_value,
714                "Failed for compression algorithm: {:?}",
715                algo
716            );
717        }
718    }
719
720    #[test]
721    fn test_compression_size_comparison() {
722        use super::SchemaCompressionAlgo;
723
724        let input = r#"{"users":[{"id":1,"name":"alice","active":true,"score":95.5},{"id":2,"name":"bob","active":false,"score":87.3},{"id":3,"name":"charlie","active":true,"score":92.1}]}"#;
725
726        println!("\n=== Compression Size Comparison ===");
727        println!("Input JSON: {} bytes", input.len());
728
729        let no_compress = encode_schema(input, None).unwrap();
730        println!("No compression: {} bytes", no_compress.len());
731
732        for algo in [
733            SchemaCompressionAlgo::Brotli,
734            SchemaCompressionAlgo::Lz4,
735            SchemaCompressionAlgo::Zstd,
736        ] {
737            let compressed = encode_schema(input, Some(algo)).unwrap();
738            let ratio = no_compress.len() as f64 / compressed.len() as f64;
739            println!(
740                "{:?}: {} bytes ({:.2}x vs uncompressed)",
741                algo,
742                compressed.len(),
743                ratio
744            );
745        }
746        println!();
747    }
748
749    #[test]
750    fn test_nested_object_roundtrip_single_level() {
751        let input = r#"{"id":"A1","name":"Jim","grade":{"math":60,"physics":66,"chemistry":61}}"#;
752
753        // JSON → IR → fiche (readable for string matching)
754        let ir = JsonParser::parse(input).unwrap();
755        let fiche = fiche::serialize_readable(&ir, false).unwrap();
756
757        // Verify flattened field names with ჻ and superscript types
758        assert!(fiche.contains("grade჻mathⁱ"));
759        assert!(fiche.contains("grade჻physicsⁱ"));
760        assert!(fiche.contains("grade჻chemistryⁱ"));
761
762        // fiche → IR → JSON (using tokenized format for roundtrip)
763        let tokenized = fiche::serialize(&ir, false).unwrap();
764        let ir2 = fiche::parse(&tokenized).unwrap();
765        let output = JsonSerializer::serialize(&ir2, false).unwrap();
766
767        // Compare JSON
768        let input_value: serde_json::Value = serde_json::from_str(input).unwrap();
769        let output_value: serde_json::Value = serde_json::from_str(&output).unwrap();
770        assert_eq!(input_value, output_value);
771    }
772
773    #[test]
774    fn test_nested_object_roundtrip_deep() {
775        let input = r#"{"a":{"b":{"c":{"d":42}}}}"#;
776
777        let ir = JsonParser::parse(input).unwrap();
778        let fiche = fiche::serialize_readable(&ir, false).unwrap();
779
780        // Verify deep nesting with ჻ and superscript type
781        assert!(fiche.contains("a჻b჻c჻dⁱ"));
782
783        // Roundtrip with tokenized format
784        let tokenized = fiche::serialize(&ir, false).unwrap();
785        let ir2 = fiche::parse(&tokenized).unwrap();
786        let output = JsonSerializer::serialize(&ir2, false).unwrap();
787
788        let input_value: serde_json::Value = serde_json::from_str(input).unwrap();
789        let output_value: serde_json::Value = serde_json::from_str(&output).unwrap();
790        assert_eq!(input_value, output_value);
791    }
792
793    #[test]
794    fn test_nested_object_roundtrip_array_of_objects() {
795        let input = r#"{"students":[{"id":"A1","name":"Jim","grade":{"math":60,"physics":66}},{"id":"B2","name":"Sara","grade":{"math":85,"physics":90}}]}"#;
796
797        let ir = JsonParser::parse(input).unwrap();
798        let fiche = fiche::serialize_readable(&ir, false).unwrap();
799
800        // Verify root key and flattened nested fields with superscript types
801        assert!(fiche.starts_with("@students"));
802        assert!(fiche.contains("grade჻mathⁱ"));
803        assert!(fiche.contains("grade჻physicsⁱ"));
804
805        // Roundtrip with tokenized format
806        let tokenized = fiche::serialize(&ir, false).unwrap();
807        let ir2 = fiche::parse(&tokenized).unwrap();
808        let output = JsonSerializer::serialize(&ir2, false).unwrap();
809
810        let input_value: serde_json::Value = serde_json::from_str(input).unwrap();
811        let output_value: serde_json::Value = serde_json::from_str(&output).unwrap();
812        assert_eq!(input_value, output_value);
813    }
814
815    #[test]
816    fn test_nested_object_roundtrip_mixed_with_arrays() {
817        // Primitive arrays now stored inline
818        let input = r#"{"person":{"name":"Alice","tags":["admin","user"],"address":{"city":"Boston","zip":"02101"}}}"#;
819
820        let ir = JsonParser::parse(input).unwrap();
821        let fiche = fiche::serialize_readable(&ir, false).unwrap();
822
823        // Verify both object nesting and inline primitive arrays with superscript types
824        assert!(fiche.contains("person჻nameˢ"));
825        // Primitive arrays now inline with superscript + ⟦⟧ syntax
826        assert!(fiche.contains("person჻tagsˢ⟦⟧"));
827        assert!(fiche.contains("person჻address჻cityˢ"));
828        assert!(fiche.contains("person჻address჻zipˢ"));
829
830        // Roundtrip with tokenized format
831        let tokenized = fiche::serialize(&ir, false).unwrap();
832        let ir2 = fiche::parse(&tokenized).unwrap();
833        let output = JsonSerializer::serialize(&ir2, false).unwrap();
834
835        // Arrays are properly reconstructed
836        let expected = r#"{"person":{"address":{"city":"Boston","zip":"02101"},"name":"Alice","tags":["admin","user"]}}"#;
837        let expected_value: serde_json::Value = serde_json::from_str(expected).unwrap();
838        let output_value: serde_json::Value = serde_json::from_str(&output).unwrap();
839        assert_eq!(expected_value, output_value);
840    }
841
842    #[test]
843    fn test_nested_object_roundtrip_schema_encode() {
844        let input = r#"{"data":{"user":{"profile":{"name":"alice","age":30}}}}"#;
845
846        // Full schema pipeline: JSON → IR → binary → display96 → framed
847        let encoded = encode_schema(input, None).unwrap();
848        let decoded = decode_schema(&encoded, false).unwrap();
849
850        let input_value: serde_json::Value = serde_json::from_str(input).unwrap();
851        let output_value: serde_json::Value = serde_json::from_str(&decoded).unwrap();
852        assert_eq!(input_value, output_value);
853    }
854}