base_d/encoders/algorithms/schema/
mod.rs

1pub mod binary_packer;
2pub mod binary_unpacker;
3pub mod compression;
4pub mod display96;
5pub mod frame;
6pub mod parsers;
7pub mod serializers;
8pub mod types;
9
10#[cfg(test)]
11mod edge_cases;
12
13// Re-export key types for convenience
14pub use binary_packer::pack;
15pub use binary_unpacker::unpack;
16pub use compression::SchemaCompressionAlgo;
17pub use frame::{decode_framed, encode_framed};
18pub use parsers::{InputParser, JsonParser};
19pub use serializers::{JsonSerializer, OutputSerializer};
20pub use types::{
21    FieldDef, FieldType, IntermediateRepresentation, SchemaError, SchemaHeader, SchemaValue,
22};
23
24/// Encode JSON to schema format: JSON → IR → binary → \[compress\] → display96 → framed
25///
26/// Transforms JSON into a compact, display-safe wire format suitable for LLM-to-LLM communication.
27/// The output is wrapped in Egyptian hieroglyph delimiters (`𓍹...𓍺`) and uses a 96-character
28/// alphabet of box-drawing and geometric shapes.
29///
30/// # Arguments
31///
32/// * `json` - JSON string to encode (must be object or array of objects)
33/// * `compress` - Optional compression algorithm (brotli, lz4, or zstd)
34///
35/// # Returns
36///
37/// Returns a framed, display-safe string like `𓍹{encoded_payload}𓍺`
38///
39/// # Errors
40///
41/// * `SchemaError::InvalidInput` - Invalid JSON or unsupported structure (e.g., root primitives)
42/// * `SchemaError::Compression` - Compression failure
43///
44/// # Example
45///
46/// ```ignore
47/// use base_d::{encode_schema, SchemaCompressionAlgo};
48///
49/// let json = r#"{"users":[{"id":1,"name":"alice"}]}"#;
50///
51/// // Without compression
52/// let encoded = encode_schema(json, None)?;
53/// println!("{}", encoded); // 𓍹╣◟╥◕◝▰◣◥▟╺▖◘▰◝▤◀╧𓍺
54///
55/// // With brotli compression
56/// let compressed = encode_schema(json, Some(SchemaCompressionAlgo::Brotli))?;
57/// ```
58///
59/// # See Also
60///
61/// * [`decode_schema`] - Decode schema format back to JSON
62/// * [SCHEMA.md](../../../SCHEMA.md) - Full format specification
63pub fn encode_schema(
64    json: &str,
65    compress: Option<SchemaCompressionAlgo>,
66) -> Result<String, SchemaError> {
67    use parsers::{InputParser, JsonParser};
68
69    let ir = JsonParser::parse(json)?;
70    let binary = pack(&ir);
71    let compressed = compression::compress_with_prefix(&binary, compress)?;
72    Ok(frame::encode_framed(&compressed))
73}
74
75/// Decode schema format to JSON: framed → display96 → \[decompress\] → binary → IR → JSON
76///
77/// Reverses the schema encoding pipeline to reconstruct the original JSON from the framed,
78/// display-safe wire format. Automatically detects and handles compression.
79///
80/// # Arguments
81///
82/// * `encoded` - Schema-encoded string with delimiters (`𓍹...𓍺`)
83/// * `pretty` - Pretty-print JSON output with indentation
84///
85/// # Returns
86///
87/// Returns the decoded JSON string (minified or pretty-printed)
88///
89/// # Errors
90///
91/// * `SchemaError::InvalidFrame` - Missing or invalid frame delimiters
92/// * `SchemaError::InvalidCharacter` - Invalid character in display96 payload
93/// * `SchemaError::Decompression` - Decompression failure
94/// * `SchemaError::UnexpectedEndOfData` - Truncated or corrupted binary data
95/// * `SchemaError::InvalidTypeTag` - Invalid type tag in header
96///
97/// # Example
98///
99/// ```ignore
100/// use base_d::decode_schema;
101///
102/// let encoded = "𓍹╣◟╥◕◝▰◣◥▟╺▖◘▰◝▤◀╧𓍺";
103///
104/// // Minified output
105/// let json = decode_schema(encoded, false)?;
106/// println!("{}", json); // {"users":[{"id":1,"name":"alice"}]}
107///
108/// // Pretty-printed output
109/// let pretty = decode_schema(encoded, true)?;
110/// println!("{}", pretty);
111/// // {
112/// //   "users": [
113/// //     {"id": 1, "name": "alice"}
114/// //   ]
115/// // }
116/// ```
117///
118/// # See Also
119///
120/// * [`encode_schema`] - Encode JSON to schema format
121/// * [SCHEMA.md](../../../SCHEMA.md) - Full format specification
122pub fn decode_schema(encoded: &str, pretty: bool) -> Result<String, SchemaError> {
123    use serializers::{JsonSerializer, OutputSerializer};
124
125    let compressed = frame::decode_framed(encoded)?;
126    let binary = compression::decompress_with_prefix(&compressed)?;
127    let ir = unpack(&binary)?;
128    JsonSerializer::serialize(&ir, pretty)
129}
130
131#[cfg(test)]
132mod integration_tests {
133    use super::*;
134    use crate::encoders::algorithms::schema::types::{
135        FLAG_HAS_NULLS, FLAG_HAS_ROOT_KEY, FieldDef, FieldType, IntermediateRepresentation,
136        SchemaHeader, SchemaValue,
137    };
138    use parsers::{InputParser, JsonParser};
139    use serializers::{JsonSerializer, OutputSerializer};
140
141    #[test]
142    fn test_round_trip_simple() {
143        let fields = vec![
144            FieldDef::new("id", FieldType::U64),
145            FieldDef::new("name", FieldType::String),
146        ];
147        let header = SchemaHeader::new(2, fields);
148
149        let values = vec![
150            SchemaValue::U64(1),
151            SchemaValue::String("Alice".to_string()),
152            SchemaValue::U64(2),
153            SchemaValue::String("Bob".to_string()),
154        ];
155
156        let original = IntermediateRepresentation::new(header, values).unwrap();
157
158        // Pack and unpack
159        let packed = pack(&original);
160        let unpacked = unpack(&packed).unwrap();
161
162        assert_eq!(original, unpacked);
163    }
164
165    #[test]
166    fn test_round_trip_all_types() {
167        let fields = vec![
168            FieldDef::new("u64_field", FieldType::U64),
169            FieldDef::new("i64_field", FieldType::I64),
170            FieldDef::new("f64_field", FieldType::F64),
171            FieldDef::new("string_field", FieldType::String),
172            FieldDef::new("bool_field", FieldType::Bool),
173        ];
174        let header = SchemaHeader::new(1, fields);
175
176        let values = vec![
177            SchemaValue::U64(42),
178            SchemaValue::I64(-42),
179            SchemaValue::F64(std::f64::consts::PI),
180            SchemaValue::String("test".to_string()),
181            SchemaValue::Bool(true),
182        ];
183
184        let original = IntermediateRepresentation::new(header, values).unwrap();
185
186        let packed = pack(&original);
187        let unpacked = unpack(&packed).unwrap();
188
189        assert_eq!(original, unpacked);
190    }
191
192    #[test]
193    fn test_round_trip_with_root_key() {
194        let mut header = SchemaHeader::new(1, vec![FieldDef::new("id", FieldType::U64)]);
195        header.root_key = Some("users".to_string());
196        header.set_flag(FLAG_HAS_ROOT_KEY);
197
198        let values = vec![SchemaValue::U64(42)];
199        let original = IntermediateRepresentation::new(header, values).unwrap();
200
201        let packed = pack(&original);
202        let unpacked = unpack(&packed).unwrap();
203
204        assert_eq!(original, unpacked);
205    }
206
207    #[test]
208    fn test_round_trip_with_nulls() {
209        let mut header = SchemaHeader::new(
210            2,
211            vec![
212                FieldDef::new("id", FieldType::U64),
213                FieldDef::new("name", FieldType::String),
214            ],
215        );
216
217        // Mark second value as null (row 0, field 1)
218        let total_values: usize = 2 * 2; // 2 rows * 2 fields = 4 values
219        let bitmap_bytes = total_values.div_ceil(8); // 1 byte
220        let mut null_bitmap = vec![0u8; bitmap_bytes];
221        null_bitmap[0] |= 1 << 1; // Set bit 1 (second value)
222
223        header.null_bitmap = Some(null_bitmap);
224        header.set_flag(FLAG_HAS_NULLS);
225
226        let values = vec![
227            SchemaValue::U64(1),
228            SchemaValue::Null, // This is marked as null in bitmap
229            SchemaValue::U64(2),
230            SchemaValue::String("Bob".to_string()),
231        ];
232
233        let original = IntermediateRepresentation::new(header, values).unwrap();
234
235        let packed = pack(&original);
236        let unpacked = unpack(&packed).unwrap();
237
238        assert_eq!(original, unpacked);
239    }
240
241    #[test]
242    fn test_round_trip_array() {
243        let fields = vec![FieldDef::new(
244            "tags",
245            FieldType::Array(Box::new(FieldType::U64)),
246        )];
247        let header = SchemaHeader::new(1, fields);
248
249        let values = vec![SchemaValue::Array(vec![
250            SchemaValue::U64(1),
251            SchemaValue::U64(2),
252            SchemaValue::U64(3),
253        ])];
254
255        let original = IntermediateRepresentation::new(header, values).unwrap();
256
257        let packed = pack(&original);
258        let unpacked = unpack(&packed).unwrap();
259
260        assert_eq!(original, unpacked);
261    }
262
263    #[test]
264    fn test_round_trip_large_values() {
265        let fields = vec![
266            FieldDef::new("large_u64", FieldType::U64),
267            FieldDef::new("large_i64", FieldType::I64),
268        ];
269        let header = SchemaHeader::new(1, fields);
270
271        let values = vec![SchemaValue::U64(u64::MAX), SchemaValue::I64(i64::MIN)];
272
273        let original = IntermediateRepresentation::new(header, values).unwrap();
274
275        let packed = pack(&original);
276        let unpacked = unpack(&packed).unwrap();
277
278        assert_eq!(original, unpacked);
279    }
280
281    #[test]
282    fn test_round_trip_empty_string() {
283        let fields = vec![FieldDef::new("name", FieldType::String)];
284        let header = SchemaHeader::new(1, fields);
285
286        let values = vec![SchemaValue::String("".to_string())];
287
288        let original = IntermediateRepresentation::new(header, values).unwrap();
289
290        let packed = pack(&original);
291        let unpacked = unpack(&packed).unwrap();
292
293        assert_eq!(original, unpacked);
294    }
295
296    #[test]
297    fn test_round_trip_multiple_rows() {
298        let fields = vec![
299            FieldDef::new("id", FieldType::U64),
300            FieldDef::new("score", FieldType::F64),
301            FieldDef::new("active", FieldType::Bool),
302        ];
303        let header = SchemaHeader::new(3, fields);
304
305        let values = vec![
306            SchemaValue::U64(1),
307            SchemaValue::F64(95.5),
308            SchemaValue::Bool(true),
309            SchemaValue::U64(2),
310            SchemaValue::F64(87.3),
311            SchemaValue::Bool(false),
312            SchemaValue::U64(3),
313            SchemaValue::F64(92.1),
314            SchemaValue::Bool(true),
315        ];
316
317        let original = IntermediateRepresentation::new(header, values).unwrap();
318
319        let packed = pack(&original);
320        let unpacked = unpack(&packed).unwrap();
321
322        assert_eq!(original, unpacked);
323    }
324
325    #[test]
326    fn test_invalid_data() {
327        // Empty data
328        let result = unpack(&[]);
329        assert!(matches!(
330            result,
331            Err(SchemaError::UnexpectedEndOfData { .. })
332        ));
333
334        // Truncated data
335        let result = unpack(&[0, 1, 2]);
336        assert!(result.is_err());
337    }
338
339    #[test]
340    fn test_json_full_roundtrip() {
341        let input = r#"{"users":[{"id":1,"name":"alice"},{"id":2,"name":"bob"}]}"#;
342        let ir = JsonParser::parse(input).unwrap();
343        let binary = pack(&ir);
344        let compressed = compression::compress_with_prefix(&binary, None).unwrap();
345        let decompressed = compression::decompress_with_prefix(&compressed).unwrap();
346        let ir2 = unpack(&decompressed).unwrap();
347        let output = JsonSerializer::serialize(&ir2, false).unwrap();
348
349        // Parse both as serde_json::Value and compare (order-independent)
350        let input_value: serde_json::Value = serde_json::from_str(input).unwrap();
351        let output_value: serde_json::Value = serde_json::from_str(&output).unwrap();
352        assert_eq!(input_value, output_value);
353    }
354
355    #[test]
356    fn test_json_simple_object() {
357        let input = r#"{"id":1,"name":"alice","score":95.5}"#;
358        let ir = JsonParser::parse(input).unwrap();
359        let binary = pack(&ir);
360        let ir2 = unpack(&binary).unwrap();
361        let output = JsonSerializer::serialize(&ir2, false).unwrap();
362
363        let input_value: serde_json::Value = serde_json::from_str(input).unwrap();
364        let output_value: serde_json::Value = serde_json::from_str(&output).unwrap();
365        assert_eq!(input_value, output_value);
366    }
367
368    #[test]
369    fn test_json_nested_objects() {
370        let input = r#"{"user":{"profile":{"name":"alice","age":30}}}"#;
371        let ir = JsonParser::parse(input).unwrap();
372        let binary = pack(&ir);
373        let ir2 = unpack(&binary).unwrap();
374        let output = JsonSerializer::serialize(&ir2, false).unwrap();
375
376        let input_value: serde_json::Value = serde_json::from_str(input).unwrap();
377        let output_value: serde_json::Value = serde_json::from_str(&output).unwrap();
378        assert_eq!(input_value, output_value);
379    }
380
381    #[test]
382    fn test_json_with_nulls() {
383        let input = r#"{"name":"alice","age":null,"active":true}"#;
384        let ir = JsonParser::parse(input).unwrap();
385        assert!(ir.header.has_flag(FLAG_HAS_NULLS));
386
387        let binary = pack(&ir);
388        let ir2 = unpack(&binary).unwrap();
389        let output = JsonSerializer::serialize(&ir2, false).unwrap();
390
391        let input_value: serde_json::Value = serde_json::from_str(input).unwrap();
392        let output_value: serde_json::Value = serde_json::from_str(&output).unwrap();
393        assert_eq!(input_value, output_value);
394    }
395
396    #[test]
397    fn test_json_with_arrays() {
398        let input = r#"{"scores":[95,87,92],"tags":["rust","json"]}"#;
399        let ir = JsonParser::parse(input).unwrap();
400        let binary = pack(&ir);
401        let ir2 = unpack(&binary).unwrap();
402        let output = JsonSerializer::serialize(&ir2, false).unwrap();
403
404        let input_value: serde_json::Value = serde_json::from_str(input).unwrap();
405        let output_value: serde_json::Value = serde_json::from_str(&output).unwrap();
406        assert_eq!(input_value, output_value);
407    }
408
409    #[test]
410    fn test_encode_schema_roundtrip() {
411        let input = r#"{"users":[{"id":1,"name":"alice"},{"id":2,"name":"bob"}]}"#;
412        let encoded = encode_schema(input, None).unwrap();
413
414        // Validate frame delimiters
415        assert!(encoded.starts_with(frame::FRAME_START));
416        assert!(encoded.ends_with(frame::FRAME_END));
417
418        // Decode back to JSON
419        let decoded = decode_schema(&encoded, false).unwrap();
420
421        // Compare as JSON values (order-independent)
422        let input_value: serde_json::Value = serde_json::from_str(input).unwrap();
423        let output_value: serde_json::Value = serde_json::from_str(&decoded).unwrap();
424        assert_eq!(input_value, output_value);
425    }
426
427    #[test]
428    fn test_encode_schema_simple() {
429        let input = r#"{"id":1,"name":"alice","score":95.5}"#;
430        let encoded = encode_schema(input, None).unwrap();
431        let decoded = decode_schema(&encoded, false).unwrap();
432
433        let input_value: serde_json::Value = serde_json::from_str(input).unwrap();
434        let output_value: serde_json::Value = serde_json::from_str(&decoded).unwrap();
435        assert_eq!(input_value, output_value);
436    }
437
438    #[test]
439    fn test_encode_schema_with_nulls() {
440        let input = r#"{"name":"alice","age":null,"active":true}"#;
441        let encoded = encode_schema(input, None).unwrap();
442        let decoded = decode_schema(&encoded, false).unwrap();
443
444        let input_value: serde_json::Value = serde_json::from_str(input).unwrap();
445        let output_value: serde_json::Value = serde_json::from_str(&decoded).unwrap();
446        assert_eq!(input_value, output_value);
447    }
448
449    #[test]
450    fn test_encode_schema_empty_object() {
451        let input = r#"{}"#;
452        let result = encode_schema(input, None);
453        // Empty objects should fail or handle gracefully
454        // This depends on JsonParser behavior
455        println!("Empty object result: {:?}", result);
456    }
457
458    #[test]
459    fn test_decode_schema_invalid_frame() {
460        let invalid = "not_framed_data";
461        let result = decode_schema(invalid, false);
462        assert!(matches!(result, Err(SchemaError::InvalidFrame(_))));
463    }
464
465    #[test]
466    fn test_decode_schema_invalid_chars() {
467        let invalid = format!("{}ABC{}", frame::FRAME_START, frame::FRAME_END);
468        let result = decode_schema(&invalid, false);
469        assert!(matches!(result, Err(SchemaError::InvalidCharacter(_))));
470    }
471
472    #[test]
473    fn test_visual_wire_format() {
474        let input = r#"{"users":[{"id":1,"name":"alice"},{"id":2,"name":"bob"}]}"#;
475        let encoded = encode_schema(input, None).unwrap();
476
477        println!("\n=== Visual Wire Format ===");
478        println!("Input JSON: {}", input);
479        println!("Input length: {} bytes", input.len());
480        println!("\nEncoded output: {}", encoded);
481        println!(
482            "Encoded length: {} chars ({} bytes UTF-8)",
483            encoded.chars().count(),
484            encoded.len()
485        );
486
487        // Calculate compression ratio
488        let compression_ratio = input.len() as f64 / encoded.len() as f64;
489        println!("Compression ratio: {:.2}x", compression_ratio);
490
491        // Decode and verify
492        let decoded = decode_schema(&encoded, false).unwrap();
493        let input_value: serde_json::Value = serde_json::from_str(input).unwrap();
494        let output_value: serde_json::Value = serde_json::from_str(&decoded).unwrap();
495        assert_eq!(input_value, output_value);
496        println!("Roundtrip verified ✓\n");
497    }
498
499    #[test]
500    fn test_compression_comparison() {
501        let test_cases = [
502            r#"{"id":1}"#,
503            r#"{"id":1,"name":"alice"}"#,
504            r#"{"users":[{"id":1,"name":"alice"},{"id":2,"name":"bob"}]}"#,
505            r#"{"data":[1,2,3,4,5,6,7,8,9,10]}"#,
506        ];
507
508        println!("\n=== Compression Comparison ===");
509        for (i, input) in test_cases.iter().enumerate() {
510            let encoded = encode_schema(input, None).unwrap();
511            let ratio = input.len() as f64 / encoded.len() as f64;
512
513            println!(
514                "Test case {}: {} bytes → {} bytes ({:.2}x)",
515                i + 1,
516                input.len(),
517                encoded.len(),
518                ratio
519            );
520        }
521        println!();
522    }
523
524    #[test]
525    fn test_encode_schema_with_compression() {
526        use super::SchemaCompressionAlgo;
527
528        let input = r#"{"users":[{"id":1,"name":"alice"},{"id":2,"name":"bob"},{"id":3,"name":"charlie"}]}"#;
529
530        // Test each compression algorithm
531        for algo in [
532            SchemaCompressionAlgo::Brotli,
533            SchemaCompressionAlgo::Lz4,
534            SchemaCompressionAlgo::Zstd,
535        ] {
536            let encoded = encode_schema(input, Some(algo)).unwrap();
537            let decoded = decode_schema(&encoded, false).unwrap();
538
539            let input_value: serde_json::Value = serde_json::from_str(input).unwrap();
540            let output_value: serde_json::Value = serde_json::from_str(&decoded).unwrap();
541            assert_eq!(
542                input_value, output_value,
543                "Failed for compression algorithm: {:?}",
544                algo
545            );
546        }
547    }
548
549    #[test]
550    fn test_compression_size_comparison() {
551        use super::SchemaCompressionAlgo;
552
553        let input = r#"{"users":[{"id":1,"name":"alice","active":true,"score":95.5},{"id":2,"name":"bob","active":false,"score":87.3},{"id":3,"name":"charlie","active":true,"score":92.1}]}"#;
554
555        println!("\n=== Compression Size Comparison ===");
556        println!("Input JSON: {} bytes", input.len());
557
558        let no_compress = encode_schema(input, None).unwrap();
559        println!("No compression: {} bytes", no_compress.len());
560
561        for algo in [
562            SchemaCompressionAlgo::Brotli,
563            SchemaCompressionAlgo::Lz4,
564            SchemaCompressionAlgo::Zstd,
565        ] {
566            let compressed = encode_schema(input, Some(algo)).unwrap();
567            let ratio = no_compress.len() as f64 / compressed.len() as f64;
568            println!(
569                "{:?}: {} bytes ({:.2}x vs uncompressed)",
570                algo,
571                compressed.len(),
572                ratio
573            );
574        }
575        println!();
576    }
577}