base_d/encoders/algorithms/schema/
mod.rs

1pub mod binary_packer;
2pub mod binary_unpacker;
3pub mod compression;
4pub mod display96;
5pub mod frame;
6pub mod parsers;
7pub mod serializers;
8pub mod types;
9
10#[cfg(test)]
11mod edge_cases;
12
13// Re-export key types for convenience
14pub use binary_packer::pack;
15pub use binary_unpacker::unpack;
16pub use compression::SchemaCompressionAlgo;
17pub use types::SchemaError;
18
19/// Encode JSON to schema format: JSON → IR → binary → \[compress\] → display96 → framed
20///
21/// Transforms JSON into a compact, display-safe wire format suitable for LLM-to-LLM communication.
22/// The output is wrapped in Egyptian hieroglyph delimiters (`𓍹...𓍺`) and uses a 96-character
23/// alphabet of box-drawing and geometric shapes.
24///
25/// # Arguments
26///
27/// * `json` - JSON string to encode (must be object or array of objects)
28/// * `compress` - Optional compression algorithm (brotli, lz4, or zstd)
29///
30/// # Returns
31///
32/// Returns a framed, display-safe string like `𓍹{encoded_payload}𓍺`
33///
34/// # Errors
35///
36/// * `SchemaError::InvalidInput` - Invalid JSON or unsupported structure (e.g., root primitives)
37/// * `SchemaError::Compression` - Compression failure
38///
39/// # Example
40///
41/// ```ignore
42/// use base_d::{encode_schema, SchemaCompressionAlgo};
43///
44/// let json = r#"{"users":[{"id":1,"name":"alice"}]}"#;
45///
46/// // Without compression
47/// let encoded = encode_schema(json, None)?;
48/// println!("{}", encoded); // 𓍹╣◟╥◕◝▰◣◥▟╺▖◘▰◝▤◀╧𓍺
49///
50/// // With brotli compression
51/// let compressed = encode_schema(json, Some(SchemaCompressionAlgo::Brotli))?;
52/// ```
53///
54/// # See Also
55///
56/// * [`decode_schema`] - Decode schema format back to JSON
57/// * [SCHEMA.md](../../../SCHEMA.md) - Full format specification
58pub fn encode_schema(
59    json: &str,
60    compress: Option<SchemaCompressionAlgo>,
61) -> Result<String, SchemaError> {
62    use parsers::{InputParser, JsonParser};
63
64    let ir = JsonParser::parse(json)?;
65    let binary = pack(&ir);
66    let compressed = compression::compress_with_prefix(&binary, compress)?;
67    Ok(frame::encode_framed(&compressed))
68}
69
70/// Decode schema format to JSON: framed → display96 → \[decompress\] → binary → IR → JSON
71///
72/// Reverses the schema encoding pipeline to reconstruct the original JSON from the framed,
73/// display-safe wire format. Automatically detects and handles compression.
74///
75/// # Arguments
76///
77/// * `encoded` - Schema-encoded string with delimiters (`𓍹...𓍺`)
78/// * `pretty` - Pretty-print JSON output with indentation
79///
80/// # Returns
81///
82/// Returns the decoded JSON string (minified or pretty-printed)
83///
84/// # Errors
85///
86/// * `SchemaError::InvalidFrame` - Missing or invalid frame delimiters
87/// * `SchemaError::InvalidCharacter` - Invalid character in display96 payload
88/// * `SchemaError::Decompression` - Decompression failure
89/// * `SchemaError::UnexpectedEndOfData` - Truncated or corrupted binary data
90/// * `SchemaError::InvalidTypeTag` - Invalid type tag in header
91///
92/// # Example
93///
94/// ```ignore
95/// use base_d::decode_schema;
96///
97/// let encoded = "𓍹╣◟╥◕◝▰◣◥▟╺▖◘▰◝▤◀╧𓍺";
98///
99/// // Minified output
100/// let json = decode_schema(encoded, false)?;
101/// println!("{}", json); // {"users":[{"id":1,"name":"alice"}]}
102///
103/// // Pretty-printed output
104/// let pretty = decode_schema(encoded, true)?;
105/// println!("{}", pretty);
106/// // {
107/// //   "users": [
108/// //     {"id": 1, "name": "alice"}
109/// //   ]
110/// // }
111/// ```
112///
113/// # See Also
114///
115/// * [`encode_schema`] - Encode JSON to schema format
116/// * [SCHEMA.md](../../../SCHEMA.md) - Full format specification
117pub fn decode_schema(encoded: &str, pretty: bool) -> Result<String, SchemaError> {
118    use serializers::{JsonSerializer, OutputSerializer};
119
120    let compressed = frame::decode_framed(encoded)?;
121    let binary = compression::decompress_with_prefix(&compressed)?;
122    let ir = unpack(&binary)?;
123    JsonSerializer::serialize(&ir, pretty)
124}
125
126#[cfg(test)]
127mod integration_tests {
128    use super::*;
129    use crate::encoders::algorithms::schema::types::{
130        FLAG_HAS_NULLS, FLAG_HAS_ROOT_KEY, FieldDef, FieldType, IntermediateRepresentation,
131        SchemaHeader, SchemaValue,
132    };
133    use parsers::{InputParser, JsonParser};
134    use serializers::{JsonSerializer, OutputSerializer};
135
136    #[test]
137    fn test_round_trip_simple() {
138        let fields = vec![
139            FieldDef::new("id", FieldType::U64),
140            FieldDef::new("name", FieldType::String),
141        ];
142        let header = SchemaHeader::new(2, fields);
143
144        let values = vec![
145            SchemaValue::U64(1),
146            SchemaValue::String("Alice".to_string()),
147            SchemaValue::U64(2),
148            SchemaValue::String("Bob".to_string()),
149        ];
150
151        let original = IntermediateRepresentation::new(header, values).unwrap();
152
153        // Pack and unpack
154        let packed = pack(&original);
155        let unpacked = unpack(&packed).unwrap();
156
157        assert_eq!(original, unpacked);
158    }
159
160    #[test]
161    fn test_round_trip_all_types() {
162        let fields = vec![
163            FieldDef::new("u64_field", FieldType::U64),
164            FieldDef::new("i64_field", FieldType::I64),
165            FieldDef::new("f64_field", FieldType::F64),
166            FieldDef::new("string_field", FieldType::String),
167            FieldDef::new("bool_field", FieldType::Bool),
168        ];
169        let header = SchemaHeader::new(1, fields);
170
171        let values = vec![
172            SchemaValue::U64(42),
173            SchemaValue::I64(-42),
174            SchemaValue::F64(std::f64::consts::PI),
175            SchemaValue::String("test".to_string()),
176            SchemaValue::Bool(true),
177        ];
178
179        let original = IntermediateRepresentation::new(header, values).unwrap();
180
181        let packed = pack(&original);
182        let unpacked = unpack(&packed).unwrap();
183
184        assert_eq!(original, unpacked);
185    }
186
187    #[test]
188    fn test_round_trip_with_root_key() {
189        let mut header = SchemaHeader::new(1, vec![FieldDef::new("id", FieldType::U64)]);
190        header.root_key = Some("users".to_string());
191        header.set_flag(FLAG_HAS_ROOT_KEY);
192
193        let values = vec![SchemaValue::U64(42)];
194        let original = IntermediateRepresentation::new(header, values).unwrap();
195
196        let packed = pack(&original);
197        let unpacked = unpack(&packed).unwrap();
198
199        assert_eq!(original, unpacked);
200    }
201
202    #[test]
203    fn test_round_trip_with_nulls() {
204        let mut header = SchemaHeader::new(
205            2,
206            vec![
207                FieldDef::new("id", FieldType::U64),
208                FieldDef::new("name", FieldType::String),
209            ],
210        );
211
212        // Mark second value as null (row 0, field 1)
213        let total_values: usize = 2 * 2; // 2 rows * 2 fields = 4 values
214        let bitmap_bytes = total_values.div_ceil(8); // 1 byte
215        let mut null_bitmap = vec![0u8; bitmap_bytes];
216        null_bitmap[0] |= 1 << 1; // Set bit 1 (second value)
217
218        header.null_bitmap = Some(null_bitmap);
219        header.set_flag(FLAG_HAS_NULLS);
220
221        let values = vec![
222            SchemaValue::U64(1),
223            SchemaValue::Null, // This is marked as null in bitmap
224            SchemaValue::U64(2),
225            SchemaValue::String("Bob".to_string()),
226        ];
227
228        let original = IntermediateRepresentation::new(header, values).unwrap();
229
230        let packed = pack(&original);
231        let unpacked = unpack(&packed).unwrap();
232
233        assert_eq!(original, unpacked);
234    }
235
236    #[test]
237    fn test_round_trip_array() {
238        let fields = vec![FieldDef::new(
239            "tags",
240            FieldType::Array(Box::new(FieldType::U64)),
241        )];
242        let header = SchemaHeader::new(1, fields);
243
244        let values = vec![SchemaValue::Array(vec![
245            SchemaValue::U64(1),
246            SchemaValue::U64(2),
247            SchemaValue::U64(3),
248        ])];
249
250        let original = IntermediateRepresentation::new(header, values).unwrap();
251
252        let packed = pack(&original);
253        let unpacked = unpack(&packed).unwrap();
254
255        assert_eq!(original, unpacked);
256    }
257
258    #[test]
259    fn test_round_trip_large_values() {
260        let fields = vec![
261            FieldDef::new("large_u64", FieldType::U64),
262            FieldDef::new("large_i64", FieldType::I64),
263        ];
264        let header = SchemaHeader::new(1, fields);
265
266        let values = vec![SchemaValue::U64(u64::MAX), SchemaValue::I64(i64::MIN)];
267
268        let original = IntermediateRepresentation::new(header, values).unwrap();
269
270        let packed = pack(&original);
271        let unpacked = unpack(&packed).unwrap();
272
273        assert_eq!(original, unpacked);
274    }
275
276    #[test]
277    fn test_round_trip_empty_string() {
278        let fields = vec![FieldDef::new("name", FieldType::String)];
279        let header = SchemaHeader::new(1, fields);
280
281        let values = vec![SchemaValue::String("".to_string())];
282
283        let original = IntermediateRepresentation::new(header, values).unwrap();
284
285        let packed = pack(&original);
286        let unpacked = unpack(&packed).unwrap();
287
288        assert_eq!(original, unpacked);
289    }
290
291    #[test]
292    fn test_round_trip_multiple_rows() {
293        let fields = vec![
294            FieldDef::new("id", FieldType::U64),
295            FieldDef::new("score", FieldType::F64),
296            FieldDef::new("active", FieldType::Bool),
297        ];
298        let header = SchemaHeader::new(3, fields);
299
300        let values = vec![
301            SchemaValue::U64(1),
302            SchemaValue::F64(95.5),
303            SchemaValue::Bool(true),
304            SchemaValue::U64(2),
305            SchemaValue::F64(87.3),
306            SchemaValue::Bool(false),
307            SchemaValue::U64(3),
308            SchemaValue::F64(92.1),
309            SchemaValue::Bool(true),
310        ];
311
312        let original = IntermediateRepresentation::new(header, values).unwrap();
313
314        let packed = pack(&original);
315        let unpacked = unpack(&packed).unwrap();
316
317        assert_eq!(original, unpacked);
318    }
319
320    #[test]
321    fn test_invalid_data() {
322        // Empty data
323        let result = unpack(&[]);
324        assert!(matches!(
325            result,
326            Err(SchemaError::UnexpectedEndOfData { .. })
327        ));
328
329        // Truncated data
330        let result = unpack(&[0, 1, 2]);
331        assert!(result.is_err());
332    }
333
334    #[test]
335    fn test_json_full_roundtrip() {
336        let input = r#"{"users":[{"id":1,"name":"alice"},{"id":2,"name":"bob"}]}"#;
337        let ir = JsonParser::parse(input).unwrap();
338        let binary = pack(&ir);
339        let compressed = compression::compress_with_prefix(&binary, None).unwrap();
340        let decompressed = compression::decompress_with_prefix(&compressed).unwrap();
341        let ir2 = unpack(&decompressed).unwrap();
342        let output = JsonSerializer::serialize(&ir2, false).unwrap();
343
344        // Parse both as serde_json::Value and compare (order-independent)
345        let input_value: serde_json::Value = serde_json::from_str(input).unwrap();
346        let output_value: serde_json::Value = serde_json::from_str(&output).unwrap();
347        assert_eq!(input_value, output_value);
348    }
349
350    #[test]
351    fn test_json_simple_object() {
352        let input = r#"{"id":1,"name":"alice","score":95.5}"#;
353        let ir = JsonParser::parse(input).unwrap();
354        let binary = pack(&ir);
355        let ir2 = unpack(&binary).unwrap();
356        let output = JsonSerializer::serialize(&ir2, false).unwrap();
357
358        let input_value: serde_json::Value = serde_json::from_str(input).unwrap();
359        let output_value: serde_json::Value = serde_json::from_str(&output).unwrap();
360        assert_eq!(input_value, output_value);
361    }
362
363    #[test]
364    fn test_json_nested_objects() {
365        let input = r#"{"user":{"profile":{"name":"alice","age":30}}}"#;
366        let ir = JsonParser::parse(input).unwrap();
367        let binary = pack(&ir);
368        let ir2 = unpack(&binary).unwrap();
369        let output = JsonSerializer::serialize(&ir2, false).unwrap();
370
371        let input_value: serde_json::Value = serde_json::from_str(input).unwrap();
372        let output_value: serde_json::Value = serde_json::from_str(&output).unwrap();
373        assert_eq!(input_value, output_value);
374    }
375
376    #[test]
377    fn test_json_with_nulls() {
378        let input = r#"{"name":"alice","age":null,"active":true}"#;
379        let ir = JsonParser::parse(input).unwrap();
380        assert!(ir.header.has_flag(FLAG_HAS_NULLS));
381
382        let binary = pack(&ir);
383        let ir2 = unpack(&binary).unwrap();
384        let output = JsonSerializer::serialize(&ir2, false).unwrap();
385
386        let input_value: serde_json::Value = serde_json::from_str(input).unwrap();
387        let output_value: serde_json::Value = serde_json::from_str(&output).unwrap();
388        assert_eq!(input_value, output_value);
389    }
390
391    #[test]
392    fn test_json_with_arrays() {
393        let input = r#"{"scores":[95,87,92],"tags":["rust","json"]}"#;
394        let ir = JsonParser::parse(input).unwrap();
395        let binary = pack(&ir);
396        let ir2 = unpack(&binary).unwrap();
397        let output = JsonSerializer::serialize(&ir2, false).unwrap();
398
399        let input_value: serde_json::Value = serde_json::from_str(input).unwrap();
400        let output_value: serde_json::Value = serde_json::from_str(&output).unwrap();
401        assert_eq!(input_value, output_value);
402    }
403
404    #[test]
405    fn test_encode_schema_roundtrip() {
406        let input = r#"{"users":[{"id":1,"name":"alice"},{"id":2,"name":"bob"}]}"#;
407        let encoded = encode_schema(input, None).unwrap();
408
409        // Validate frame delimiters
410        assert!(encoded.starts_with(frame::FRAME_START));
411        assert!(encoded.ends_with(frame::FRAME_END));
412
413        // Decode back to JSON
414        let decoded = decode_schema(&encoded, false).unwrap();
415
416        // Compare as JSON values (order-independent)
417        let input_value: serde_json::Value = serde_json::from_str(input).unwrap();
418        let output_value: serde_json::Value = serde_json::from_str(&decoded).unwrap();
419        assert_eq!(input_value, output_value);
420    }
421
422    #[test]
423    fn test_encode_schema_simple() {
424        let input = r#"{"id":1,"name":"alice","score":95.5}"#;
425        let encoded = encode_schema(input, None).unwrap();
426        let decoded = decode_schema(&encoded, false).unwrap();
427
428        let input_value: serde_json::Value = serde_json::from_str(input).unwrap();
429        let output_value: serde_json::Value = serde_json::from_str(&decoded).unwrap();
430        assert_eq!(input_value, output_value);
431    }
432
433    #[test]
434    fn test_encode_schema_with_nulls() {
435        let input = r#"{"name":"alice","age":null,"active":true}"#;
436        let encoded = encode_schema(input, None).unwrap();
437        let decoded = decode_schema(&encoded, false).unwrap();
438
439        let input_value: serde_json::Value = serde_json::from_str(input).unwrap();
440        let output_value: serde_json::Value = serde_json::from_str(&decoded).unwrap();
441        assert_eq!(input_value, output_value);
442    }
443
444    #[test]
445    fn test_encode_schema_empty_object() {
446        let input = r#"{}"#;
447        let result = encode_schema(input, None);
448        // Empty objects should fail or handle gracefully
449        // This depends on JsonParser behavior
450        println!("Empty object result: {:?}", result);
451    }
452
453    #[test]
454    fn test_decode_schema_invalid_frame() {
455        let invalid = "not_framed_data";
456        let result = decode_schema(invalid, false);
457        assert!(matches!(result, Err(SchemaError::InvalidFrame(_))));
458    }
459
460    #[test]
461    fn test_decode_schema_invalid_chars() {
462        let invalid = format!("{}ABC{}", frame::FRAME_START, frame::FRAME_END);
463        let result = decode_schema(&invalid, false);
464        assert!(matches!(result, Err(SchemaError::InvalidCharacter(_))));
465    }
466
467    #[test]
468    fn test_visual_wire_format() {
469        let input = r#"{"users":[{"id":1,"name":"alice"},{"id":2,"name":"bob"}]}"#;
470        let encoded = encode_schema(input, None).unwrap();
471
472        println!("\n=== Visual Wire Format ===");
473        println!("Input JSON: {}", input);
474        println!("Input length: {} bytes", input.len());
475        println!("\nEncoded output: {}", encoded);
476        println!(
477            "Encoded length: {} chars ({} bytes UTF-8)",
478            encoded.chars().count(),
479            encoded.len()
480        );
481
482        // Calculate compression ratio
483        let compression_ratio = input.len() as f64 / encoded.len() as f64;
484        println!("Compression ratio: {:.2}x", compression_ratio);
485
486        // Decode and verify
487        let decoded = decode_schema(&encoded, false).unwrap();
488        let input_value: serde_json::Value = serde_json::from_str(input).unwrap();
489        let output_value: serde_json::Value = serde_json::from_str(&decoded).unwrap();
490        assert_eq!(input_value, output_value);
491        println!("Roundtrip verified ✓\n");
492    }
493
494    #[test]
495    fn test_compression_comparison() {
496        let test_cases = [
497            r#"{"id":1}"#,
498            r#"{"id":1,"name":"alice"}"#,
499            r#"{"users":[{"id":1,"name":"alice"},{"id":2,"name":"bob"}]}"#,
500            r#"{"data":[1,2,3,4,5,6,7,8,9,10]}"#,
501        ];
502
503        println!("\n=== Compression Comparison ===");
504        for (i, input) in test_cases.iter().enumerate() {
505            let encoded = encode_schema(input, None).unwrap();
506            let ratio = input.len() as f64 / encoded.len() as f64;
507
508            println!(
509                "Test case {}: {} bytes → {} bytes ({:.2}x)",
510                i + 1,
511                input.len(),
512                encoded.len(),
513                ratio
514            );
515        }
516        println!();
517    }
518
519    #[test]
520    fn test_encode_schema_with_compression() {
521        use super::SchemaCompressionAlgo;
522
523        let input = r#"{"users":[{"id":1,"name":"alice"},{"id":2,"name":"bob"},{"id":3,"name":"charlie"}]}"#;
524
525        // Test each compression algorithm
526        for algo in [
527            SchemaCompressionAlgo::Brotli,
528            SchemaCompressionAlgo::Lz4,
529            SchemaCompressionAlgo::Zstd,
530        ] {
531            let encoded = encode_schema(input, Some(algo)).unwrap();
532            let decoded = decode_schema(&encoded, false).unwrap();
533
534            let input_value: serde_json::Value = serde_json::from_str(input).unwrap();
535            let output_value: serde_json::Value = serde_json::from_str(&decoded).unwrap();
536            assert_eq!(
537                input_value, output_value,
538                "Failed for compression algorithm: {:?}",
539                algo
540            );
541        }
542    }
543
544    #[test]
545    fn test_compression_size_comparison() {
546        use super::SchemaCompressionAlgo;
547
548        let input = r#"{"users":[{"id":1,"name":"alice","active":true,"score":95.5},{"id":2,"name":"bob","active":false,"score":87.3},{"id":3,"name":"charlie","active":true,"score":92.1}]}"#;
549
550        println!("\n=== Compression Size Comparison ===");
551        println!("Input JSON: {} bytes", input.len());
552
553        let no_compress = encode_schema(input, None).unwrap();
554        println!("No compression: {} bytes", no_compress.len());
555
556        for algo in [
557            SchemaCompressionAlgo::Brotli,
558            SchemaCompressionAlgo::Lz4,
559            SchemaCompressionAlgo::Zstd,
560        ] {
561            let compressed = encode_schema(input, Some(algo)).unwrap();
562            let ratio = no_compress.len() as f64 / compressed.len() as f64;
563            println!(
564                "{:?}: {} bytes ({:.2}x vs uncompressed)",
565                algo,
566                compressed.len(),
567                ratio
568            );
569        }
570        println!();
571    }
572}