base_d/encoders/algorithms/schema/
mod.rs

1pub mod binary_packer;
2pub mod binary_unpacker;
3pub mod compression;
4pub mod display96;
5pub mod fiche;
6pub mod frame;
7pub mod parsers;
8pub mod serializers;
9pub mod types;
10
11#[cfg(test)]
12mod edge_cases;
13
14// Re-export key types for convenience
15pub use binary_packer::pack;
16pub use binary_unpacker::unpack;
17pub use compression::SchemaCompressionAlgo;
18pub use frame::{decode_framed, encode_framed};
19pub use parsers::{InputParser, JsonParser};
20pub use serializers::{JsonSerializer, OutputSerializer};
21pub use types::{
22    FieldDef, FieldType, IntermediateRepresentation, SchemaError, SchemaHeader, SchemaValue,
23};
24
25// Re-export fiche functions for library users
26#[allow(unused_imports)]
27pub use fiche::{parse as parse_fiche, serialize as serialize_fiche};
28
29/// Encode JSON to schema format: JSON → IR → binary → \[compress\] → display96 → framed
30///
31/// Transforms JSON into a compact, display-safe wire format suitable for LLM-to-LLM communication.
32/// The output is wrapped in Egyptian hieroglyph delimiters (`𓍹...𓍺`) and uses a 96-character
33/// alphabet of box-drawing and geometric shapes.
34///
35/// # Arguments
36///
37/// * `json` - JSON string to encode (must be object or array of objects)
38/// * `compress` - Optional compression algorithm (brotli, lz4, or zstd)
39///
40/// # Returns
41///
42/// Returns a framed, display-safe string like `𓍹{encoded_payload}𓍺`
43///
44/// # Errors
45///
46/// * `SchemaError::InvalidInput` - Invalid JSON or unsupported structure (e.g., root primitives)
47/// * `SchemaError::Compression` - Compression failure
48///
49/// # Example
50///
51/// ```ignore
52/// use base_d::{encode_schema, SchemaCompressionAlgo};
53///
54/// let json = r#"{"users":[{"id":1,"name":"alice"}]}"#;
55///
56/// // Without compression
57/// let encoded = encode_schema(json, None)?;
58/// println!("{}", encoded); // 𓍹╣◟╥◕◝▰◣◥▟╺▖◘▰◝▤◀╧𓍺
59///
60/// // With brotli compression
61/// let compressed = encode_schema(json, Some(SchemaCompressionAlgo::Brotli))?;
62/// ```
63///
64/// # See Also
65///
66/// * [`decode_schema`] - Decode schema format back to JSON
67/// * [SCHEMA.md](../../../SCHEMA.md) - Full format specification
68pub fn encode_schema(
69    json: &str,
70    compress: Option<SchemaCompressionAlgo>,
71) -> Result<String, SchemaError> {
72    use parsers::{InputParser, JsonParser};
73
74    let ir = JsonParser::parse(json)?;
75    let binary = pack(&ir);
76    let compressed = compression::compress_with_prefix(&binary, compress)?;
77    Ok(frame::encode_framed(&compressed))
78}
79
80/// Decode schema format to JSON: framed → display96 → \[decompress\] → binary → IR → JSON
81///
82/// Reverses the schema encoding pipeline to reconstruct the original JSON from the framed,
83/// display-safe wire format. Automatically detects and handles compression.
84///
85/// # Arguments
86///
87/// * `encoded` - Schema-encoded string with delimiters (`𓍹...𓍺`)
88/// * `pretty` - Pretty-print JSON output with indentation
89///
90/// # Returns
91///
92/// Returns the decoded JSON string (minified or pretty-printed)
93///
94/// # Errors
95///
96/// * `SchemaError::InvalidFrame` - Missing or invalid frame delimiters
97/// * `SchemaError::InvalidCharacter` - Invalid character in display96 payload
98/// * `SchemaError::Decompression` - Decompression failure
99/// * `SchemaError::UnexpectedEndOfData` - Truncated or corrupted binary data
100/// * `SchemaError::InvalidTypeTag` - Invalid type tag in header
101///
102/// # Example
103///
104/// ```ignore
105/// use base_d::decode_schema;
106///
107/// let encoded = "𓍹╣◟╥◕◝▰◣◥▟╺▖◘▰◝▤◀╧𓍺";
108///
109/// // Minified output
110/// let json = decode_schema(encoded, false)?;
111/// println!("{}", json); // {"users":[{"id":1,"name":"alice"}]}
112///
113/// // Pretty-printed output
114/// let pretty = decode_schema(encoded, true)?;
115/// println!("{}", pretty);
116/// // {
117/// //   "users": [
118/// //     {"id": 1, "name": "alice"}
119/// //   ]
120/// // }
121/// ```
122///
123/// # See Also
124///
125/// * [`encode_schema`] - Encode JSON to schema format
126/// * [SCHEMA.md](../../../SCHEMA.md) - Full format specification
127pub fn decode_schema(encoded: &str, pretty: bool) -> Result<String, SchemaError> {
128    use serializers::{JsonSerializer, OutputSerializer};
129
130    let compressed = frame::decode_framed(encoded)?;
131    let binary = compression::decompress_with_prefix(&compressed)?;
132    let ir = unpack(&binary)?;
133    JsonSerializer::serialize(&ir, pretty)
134}
135
136/// Encode JSON to fiche format: JSON → IR → fiche
137///
138/// Transforms JSON into a model-readable structured format using Unicode delimiters.
139/// Unlike carrier98 (opaque binary), fiche is designed for models to parse directly.
140///
141/// # Format
142///
143/// ```text
144/// @{root}┃{field}:{type}┃{field}:{type}...
145/// ◉{value}┃{value}┃{value}...
146/// ```
147///
148/// # Example
149///
150/// ```ignore
151/// use base_d::encode_fiche;
152///
153/// let json = r#"{"users":[{"id":1,"name":"alice"}]}"#;
154/// let fiche = encode_fiche(json)?;
155/// // @users┃id:int┃name:str
156/// // ◉1┃alice
157/// ```
158pub fn encode_fiche(json: &str) -> Result<String, SchemaError> {
159    use parsers::{InputParser, JsonParser};
160
161    let ir = JsonParser::parse(json)?;
162    fiche::serialize(&ir)
163}
164
165/// Decode fiche format to JSON: fiche → IR → JSON
166///
167/// Reverses the fiche encoding to reconstruct JSON from the model-readable format.
168///
169/// # Example
170///
171/// ```ignore
172/// use base_d::decode_fiche;
173///
174/// let fiche = "@users┃id:int┃name:str\n◉1┃alice";
175/// let json = decode_fiche(fiche, false)?;
176/// // {"users":[{"id":1,"name":"alice"}]}
177/// ```
178pub fn decode_fiche(fiche_input: &str, pretty: bool) -> Result<String, SchemaError> {
179    use serializers::{JsonSerializer, OutputSerializer};
180
181    let ir = fiche::parse(fiche_input)?;
182    JsonSerializer::serialize(&ir, pretty)
183}
184
185#[cfg(test)]
186mod integration_tests {
187    use super::*;
188    use crate::encoders::algorithms::schema::types::{
189        FLAG_HAS_NULLS, FLAG_HAS_ROOT_KEY, FieldDef, FieldType, IntermediateRepresentation,
190        SchemaHeader, SchemaValue,
191    };
192    use parsers::{InputParser, JsonParser};
193    use serializers::{JsonSerializer, OutputSerializer};
194
195    #[test]
196    fn test_round_trip_simple() {
197        let fields = vec![
198            FieldDef::new("id", FieldType::U64),
199            FieldDef::new("name", FieldType::String),
200        ];
201        let header = SchemaHeader::new(2, fields);
202
203        let values = vec![
204            SchemaValue::U64(1),
205            SchemaValue::String("Alice".to_string()),
206            SchemaValue::U64(2),
207            SchemaValue::String("Bob".to_string()),
208        ];
209
210        let original = IntermediateRepresentation::new(header, values).unwrap();
211
212        // Pack and unpack
213        let packed = pack(&original);
214        let unpacked = unpack(&packed).unwrap();
215
216        assert_eq!(original, unpacked);
217    }
218
219    #[test]
220    fn test_round_trip_all_types() {
221        let fields = vec![
222            FieldDef::new("u64_field", FieldType::U64),
223            FieldDef::new("i64_field", FieldType::I64),
224            FieldDef::new("f64_field", FieldType::F64),
225            FieldDef::new("string_field", FieldType::String),
226            FieldDef::new("bool_field", FieldType::Bool),
227        ];
228        let header = SchemaHeader::new(1, fields);
229
230        let values = vec![
231            SchemaValue::U64(42),
232            SchemaValue::I64(-42),
233            SchemaValue::F64(std::f64::consts::PI),
234            SchemaValue::String("test".to_string()),
235            SchemaValue::Bool(true),
236        ];
237
238        let original = IntermediateRepresentation::new(header, values).unwrap();
239
240        let packed = pack(&original);
241        let unpacked = unpack(&packed).unwrap();
242
243        assert_eq!(original, unpacked);
244    }
245
246    #[test]
247    fn test_round_trip_with_root_key() {
248        let mut header = SchemaHeader::new(1, vec![FieldDef::new("id", FieldType::U64)]);
249        header.root_key = Some("users".to_string());
250        header.set_flag(FLAG_HAS_ROOT_KEY);
251
252        let values = vec![SchemaValue::U64(42)];
253        let original = IntermediateRepresentation::new(header, values).unwrap();
254
255        let packed = pack(&original);
256        let unpacked = unpack(&packed).unwrap();
257
258        assert_eq!(original, unpacked);
259    }
260
261    #[test]
262    fn test_round_trip_with_nulls() {
263        let mut header = SchemaHeader::new(
264            2,
265            vec![
266                FieldDef::new("id", FieldType::U64),
267                FieldDef::new("name", FieldType::String),
268            ],
269        );
270
271        // Mark second value as null (row 0, field 1)
272        let total_values: usize = 2 * 2; // 2 rows * 2 fields = 4 values
273        let bitmap_bytes = total_values.div_ceil(8); // 1 byte
274        let mut null_bitmap = vec![0u8; bitmap_bytes];
275        null_bitmap[0] |= 1 << 1; // Set bit 1 (second value)
276
277        header.null_bitmap = Some(null_bitmap);
278        header.set_flag(FLAG_HAS_NULLS);
279
280        let values = vec![
281            SchemaValue::U64(1),
282            SchemaValue::Null, // This is marked as null in bitmap
283            SchemaValue::U64(2),
284            SchemaValue::String("Bob".to_string()),
285        ];
286
287        let original = IntermediateRepresentation::new(header, values).unwrap();
288
289        let packed = pack(&original);
290        let unpacked = unpack(&packed).unwrap();
291
292        assert_eq!(original, unpacked);
293    }
294
295    #[test]
296    fn test_round_trip_array() {
297        let fields = vec![FieldDef::new(
298            "tags",
299            FieldType::Array(Box::new(FieldType::U64)),
300        )];
301        let header = SchemaHeader::new(1, fields);
302
303        let values = vec![SchemaValue::Array(vec![
304            SchemaValue::U64(1),
305            SchemaValue::U64(2),
306            SchemaValue::U64(3),
307        ])];
308
309        let original = IntermediateRepresentation::new(header, values).unwrap();
310
311        let packed = pack(&original);
312        let unpacked = unpack(&packed).unwrap();
313
314        assert_eq!(original, unpacked);
315    }
316
317    #[test]
318    fn test_round_trip_large_values() {
319        let fields = vec![
320            FieldDef::new("large_u64", FieldType::U64),
321            FieldDef::new("large_i64", FieldType::I64),
322        ];
323        let header = SchemaHeader::new(1, fields);
324
325        let values = vec![SchemaValue::U64(u64::MAX), SchemaValue::I64(i64::MIN)];
326
327        let original = IntermediateRepresentation::new(header, values).unwrap();
328
329        let packed = pack(&original);
330        let unpacked = unpack(&packed).unwrap();
331
332        assert_eq!(original, unpacked);
333    }
334
335    #[test]
336    fn test_round_trip_empty_string() {
337        let fields = vec![FieldDef::new("name", FieldType::String)];
338        let header = SchemaHeader::new(1, fields);
339
340        let values = vec![SchemaValue::String("".to_string())];
341
342        let original = IntermediateRepresentation::new(header, values).unwrap();
343
344        let packed = pack(&original);
345        let unpacked = unpack(&packed).unwrap();
346
347        assert_eq!(original, unpacked);
348    }
349
350    #[test]
351    fn test_round_trip_multiple_rows() {
352        let fields = vec![
353            FieldDef::new("id", FieldType::U64),
354            FieldDef::new("score", FieldType::F64),
355            FieldDef::new("active", FieldType::Bool),
356        ];
357        let header = SchemaHeader::new(3, fields);
358
359        let values = vec![
360            SchemaValue::U64(1),
361            SchemaValue::F64(95.5),
362            SchemaValue::Bool(true),
363            SchemaValue::U64(2),
364            SchemaValue::F64(87.3),
365            SchemaValue::Bool(false),
366            SchemaValue::U64(3),
367            SchemaValue::F64(92.1),
368            SchemaValue::Bool(true),
369        ];
370
371        let original = IntermediateRepresentation::new(header, values).unwrap();
372
373        let packed = pack(&original);
374        let unpacked = unpack(&packed).unwrap();
375
376        assert_eq!(original, unpacked);
377    }
378
379    #[test]
380    fn test_invalid_data() {
381        // Empty data
382        let result = unpack(&[]);
383        assert!(matches!(
384            result,
385            Err(SchemaError::UnexpectedEndOfData { .. })
386        ));
387
388        // Truncated data
389        let result = unpack(&[0, 1, 2]);
390        assert!(result.is_err());
391    }
392
393    #[test]
394    fn test_json_full_roundtrip() {
395        let input = r#"{"users":[{"id":1,"name":"alice"},{"id":2,"name":"bob"}]}"#;
396        let ir = JsonParser::parse(input).unwrap();
397        let binary = pack(&ir);
398        let compressed = compression::compress_with_prefix(&binary, None).unwrap();
399        let decompressed = compression::decompress_with_prefix(&compressed).unwrap();
400        let ir2 = unpack(&decompressed).unwrap();
401        let output = JsonSerializer::serialize(&ir2, false).unwrap();
402
403        // Parse both as serde_json::Value and compare (order-independent)
404        let input_value: serde_json::Value = serde_json::from_str(input).unwrap();
405        let output_value: serde_json::Value = serde_json::from_str(&output).unwrap();
406        assert_eq!(input_value, output_value);
407    }
408
409    #[test]
410    fn test_json_simple_object() {
411        let input = r#"{"id":1,"name":"alice","score":95.5}"#;
412        let ir = JsonParser::parse(input).unwrap();
413        let binary = pack(&ir);
414        let ir2 = unpack(&binary).unwrap();
415        let output = JsonSerializer::serialize(&ir2, false).unwrap();
416
417        let input_value: serde_json::Value = serde_json::from_str(input).unwrap();
418        let output_value: serde_json::Value = serde_json::from_str(&output).unwrap();
419        assert_eq!(input_value, output_value);
420    }
421
422    #[test]
423    fn test_json_nested_objects() {
424        let input = r#"{"user":{"profile":{"name":"alice","age":30}}}"#;
425        let ir = JsonParser::parse(input).unwrap();
426        let binary = pack(&ir);
427        let ir2 = unpack(&binary).unwrap();
428        let output = JsonSerializer::serialize(&ir2, false).unwrap();
429
430        let input_value: serde_json::Value = serde_json::from_str(input).unwrap();
431        let output_value: serde_json::Value = serde_json::from_str(&output).unwrap();
432        assert_eq!(input_value, output_value);
433    }
434
435    #[test]
436    fn test_json_with_nulls() {
437        let input = r#"{"name":"alice","age":null,"active":true}"#;
438        let ir = JsonParser::parse(input).unwrap();
439        assert!(ir.header.has_flag(FLAG_HAS_NULLS));
440
441        let binary = pack(&ir);
442        let ir2 = unpack(&binary).unwrap();
443        let output = JsonSerializer::serialize(&ir2, false).unwrap();
444
445        let input_value: serde_json::Value = serde_json::from_str(input).unwrap();
446        let output_value: serde_json::Value = serde_json::from_str(&output).unwrap();
447        assert_eq!(input_value, output_value);
448    }
449
450    #[test]
451    fn test_json_with_arrays() {
452        let input = r#"{"scores":[95,87,92],"tags":["rust","json"]}"#;
453        let ir = JsonParser::parse(input).unwrap();
454        let binary = pack(&ir);
455        let ir2 = unpack(&binary).unwrap();
456        let output = JsonSerializer::serialize(&ir2, false).unwrap();
457
458        let input_value: serde_json::Value = serde_json::from_str(input).unwrap();
459        let output_value: serde_json::Value = serde_json::from_str(&output).unwrap();
460        assert_eq!(input_value, output_value);
461    }
462
463    #[test]
464    fn test_encode_schema_roundtrip() {
465        let input = r#"{"users":[{"id":1,"name":"alice"},{"id":2,"name":"bob"}]}"#;
466        let encoded = encode_schema(input, None).unwrap();
467
468        // Validate frame delimiters
469        assert!(encoded.starts_with(frame::FRAME_START));
470        assert!(encoded.ends_with(frame::FRAME_END));
471
472        // Decode back to JSON
473        let decoded = decode_schema(&encoded, false).unwrap();
474
475        // Compare as JSON values (order-independent)
476        let input_value: serde_json::Value = serde_json::from_str(input).unwrap();
477        let output_value: serde_json::Value = serde_json::from_str(&decoded).unwrap();
478        assert_eq!(input_value, output_value);
479    }
480
481    #[test]
482    fn test_encode_schema_simple() {
483        let input = r#"{"id":1,"name":"alice","score":95.5}"#;
484        let encoded = encode_schema(input, None).unwrap();
485        let decoded = decode_schema(&encoded, false).unwrap();
486
487        let input_value: serde_json::Value = serde_json::from_str(input).unwrap();
488        let output_value: serde_json::Value = serde_json::from_str(&decoded).unwrap();
489        assert_eq!(input_value, output_value);
490    }
491
492    #[test]
493    fn test_encode_schema_with_nulls() {
494        let input = r#"{"name":"alice","age":null,"active":true}"#;
495        let encoded = encode_schema(input, None).unwrap();
496        let decoded = decode_schema(&encoded, false).unwrap();
497
498        let input_value: serde_json::Value = serde_json::from_str(input).unwrap();
499        let output_value: serde_json::Value = serde_json::from_str(&decoded).unwrap();
500        assert_eq!(input_value, output_value);
501    }
502
503    #[test]
504    fn test_encode_schema_empty_object() {
505        let input = r#"{}"#;
506        let result = encode_schema(input, None);
507        // Empty objects should fail or handle gracefully
508        // This depends on JsonParser behavior
509        println!("Empty object result: {:?}", result);
510    }
511
512    #[test]
513    fn test_decode_schema_invalid_frame() {
514        let invalid = "not_framed_data";
515        let result = decode_schema(invalid, false);
516        assert!(matches!(result, Err(SchemaError::InvalidFrame(_))));
517    }
518
519    #[test]
520    fn test_decode_schema_invalid_chars() {
521        let invalid = format!("{}ABC{}", frame::FRAME_START, frame::FRAME_END);
522        let result = decode_schema(&invalid, false);
523        assert!(matches!(result, Err(SchemaError::InvalidCharacter(_))));
524    }
525
526    #[test]
527    fn test_visual_wire_format() {
528        let input = r#"{"users":[{"id":1,"name":"alice"},{"id":2,"name":"bob"}]}"#;
529        let encoded = encode_schema(input, None).unwrap();
530
531        println!("\n=== Visual Wire Format ===");
532        println!("Input JSON: {}", input);
533        println!("Input length: {} bytes", input.len());
534        println!("\nEncoded output: {}", encoded);
535        println!(
536            "Encoded length: {} chars ({} bytes UTF-8)",
537            encoded.chars().count(),
538            encoded.len()
539        );
540
541        // Calculate compression ratio
542        let compression_ratio = input.len() as f64 / encoded.len() as f64;
543        println!("Compression ratio: {:.2}x", compression_ratio);
544
545        // Decode and verify
546        let decoded = decode_schema(&encoded, false).unwrap();
547        let input_value: serde_json::Value = serde_json::from_str(input).unwrap();
548        let output_value: serde_json::Value = serde_json::from_str(&decoded).unwrap();
549        assert_eq!(input_value, output_value);
550        println!("Roundtrip verified ✓\n");
551    }
552
553    #[test]
554    fn test_compression_comparison() {
555        let test_cases = [
556            r#"{"id":1}"#,
557            r#"{"id":1,"name":"alice"}"#,
558            r#"{"users":[{"id":1,"name":"alice"},{"id":2,"name":"bob"}]}"#,
559            r#"{"data":[1,2,3,4,5,6,7,8,9,10]}"#,
560        ];
561
562        println!("\n=== Compression Comparison ===");
563        for (i, input) in test_cases.iter().enumerate() {
564            let encoded = encode_schema(input, None).unwrap();
565            let ratio = input.len() as f64 / encoded.len() as f64;
566
567            println!(
568                "Test case {}: {} bytes → {} bytes ({:.2}x)",
569                i + 1,
570                input.len(),
571                encoded.len(),
572                ratio
573            );
574        }
575        println!();
576    }
577
578    #[test]
579    fn test_encode_schema_with_compression() {
580        use super::SchemaCompressionAlgo;
581
582        let input = r#"{"users":[{"id":1,"name":"alice"},{"id":2,"name":"bob"},{"id":3,"name":"charlie"}]}"#;
583
584        // Test each compression algorithm
585        for algo in [
586            SchemaCompressionAlgo::Brotli,
587            SchemaCompressionAlgo::Lz4,
588            SchemaCompressionAlgo::Zstd,
589        ] {
590            let encoded = encode_schema(input, Some(algo)).unwrap();
591            let decoded = decode_schema(&encoded, false).unwrap();
592
593            let input_value: serde_json::Value = serde_json::from_str(input).unwrap();
594            let output_value: serde_json::Value = serde_json::from_str(&decoded).unwrap();
595            assert_eq!(
596                input_value, output_value,
597                "Failed for compression algorithm: {:?}",
598                algo
599            );
600        }
601    }
602
603    #[test]
604    fn test_compression_size_comparison() {
605        use super::SchemaCompressionAlgo;
606
607        let input = r#"{"users":[{"id":1,"name":"alice","active":true,"score":95.5},{"id":2,"name":"bob","active":false,"score":87.3},{"id":3,"name":"charlie","active":true,"score":92.1}]}"#;
608
609        println!("\n=== Compression Size Comparison ===");
610        println!("Input JSON: {} bytes", input.len());
611
612        let no_compress = encode_schema(input, None).unwrap();
613        println!("No compression: {} bytes", no_compress.len());
614
615        for algo in [
616            SchemaCompressionAlgo::Brotli,
617            SchemaCompressionAlgo::Lz4,
618            SchemaCompressionAlgo::Zstd,
619        ] {
620            let compressed = encode_schema(input, Some(algo)).unwrap();
621            let ratio = no_compress.len() as f64 / compressed.len() as f64;
622            println!(
623                "{:?}: {} bytes ({:.2}x vs uncompressed)",
624                algo,
625                compressed.len(),
626                ratio
627            );
628        }
629        println!();
630    }
631}