Skip to main content

embeddenator_interop/
formats.rs

1//! Format conversion utilities for embeddenator types.
2//!
3//! This module provides conversions between embeddenator native types
4//! and various external formats:
5//! - JSON (human-readable, cross-language)
6//! - Bincode (binary, efficient)
7//! - Text representations (debugging, CLI output)
8//!
9//! All conversions support round-trip guarantees where applicable.
10
11use embeddenator_fs::{Engram, Manifest, SubEngram};
12use embeddenator_vsa::{ReversibleVSAConfig, SparseVec};
13use std::io;
14
15/// Supported output formats for conversion
16#[derive(Debug, Clone, Copy, PartialEq, Eq)]
17pub enum OutputFormat {
18    /// JSON format (human-readable)
19    Json,
20    /// JSON format (pretty-printed)
21    JsonPretty,
22    /// Bincode format (binary, efficient)
23    Bincode,
24    /// Text format (debugging)
25    Text,
26}
27
28/// Error type for format conversions
29#[derive(Debug, Clone)]
30pub enum FormatError {
31    /// Serialization failed
32    SerializationFailed(String),
33    /// Deserialization failed
34    DeserializationFailed(String),
35    /// Unsupported format combination
36    UnsupportedFormat(String),
37    /// IO error
38    IoError(String),
39}
40
41impl std::fmt::Display for FormatError {
42    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
43        match self {
44            FormatError::SerializationFailed(msg) => write!(f, "serialization failed: {}", msg),
45            FormatError::DeserializationFailed(msg) => {
46                write!(f, "deserialization failed: {}", msg)
47            }
48            FormatError::UnsupportedFormat(msg) => write!(f, "unsupported format: {}", msg),
49            FormatError::IoError(msg) => write!(f, "I/O error: {}", msg),
50        }
51    }
52}
53
54impl std::error::Error for FormatError {}
55
56impl From<io::Error> for FormatError {
57    fn from(err: io::Error) -> Self {
58        FormatError::IoError(err.to_string())
59    }
60}
61
62// ============================================================================
63// SparseVec Conversions
64// ============================================================================
65
66/// Convert SparseVec to specified format
67pub fn sparse_vec_to_format(vec: &SparseVec, format: OutputFormat) -> Result<Vec<u8>, FormatError> {
68    match format {
69        OutputFormat::Json => {
70            serde_json::to_vec(vec).map_err(|e| FormatError::SerializationFailed(e.to_string()))
71        }
72        OutputFormat::JsonPretty => serde_json::to_vec_pretty(vec)
73            .map_err(|e| FormatError::SerializationFailed(e.to_string())),
74        OutputFormat::Bincode => {
75            bincode::serialize(vec).map_err(|e| FormatError::SerializationFailed(e.to_string()))
76        }
77        OutputFormat::Text => Ok(format_sparse_vec_text(vec).into_bytes()),
78    }
79}
80
81/// Convert from bytes to SparseVec
82pub fn sparse_vec_from_format(data: &[u8], format: OutputFormat) -> Result<SparseVec, FormatError> {
83    match format {
84        OutputFormat::Json | OutputFormat::JsonPretty => serde_json::from_slice(data)
85            .map_err(|e| FormatError::DeserializationFailed(e.to_string())),
86        OutputFormat::Bincode => bincode::deserialize(data)
87            .map_err(|e| FormatError::DeserializationFailed(e.to_string())),
88        OutputFormat::Text => Err(FormatError::UnsupportedFormat(
89            "cannot deserialize from text format".to_string(),
90        )),
91    }
92}
93
94fn format_sparse_vec_text(vec: &SparseVec) -> String {
95    format!(
96        "SparseVec {{ pos: {:?}, neg: {:?}, nnz: {} }}",
97        &vec.pos[..vec.pos.len().min(10)],
98        &vec.neg[..vec.neg.len().min(10)],
99        vec.pos.len() + vec.neg.len()
100    )
101}
102
103// ============================================================================
104// Engram Conversions
105// ============================================================================
106
107/// Convert Engram to specified format
108pub fn engram_to_format(engram: &Engram, format: OutputFormat) -> Result<Vec<u8>, FormatError> {
109    match format {
110        OutputFormat::Json => {
111            serde_json::to_vec(engram).map_err(|e| FormatError::SerializationFailed(e.to_string()))
112        }
113        OutputFormat::JsonPretty => serde_json::to_vec_pretty(engram)
114            .map_err(|e| FormatError::SerializationFailed(e.to_string())),
115        OutputFormat::Bincode => {
116            bincode::serialize(engram).map_err(|e| FormatError::SerializationFailed(e.to_string()))
117        }
118        OutputFormat::Text => Ok(format_engram_text(engram).into_bytes()),
119    }
120}
121
122/// Convert from bytes to Engram
123pub fn engram_from_format(data: &[u8], format: OutputFormat) -> Result<Engram, FormatError> {
124    match format {
125        OutputFormat::Json | OutputFormat::JsonPretty => serde_json::from_slice(data)
126            .map_err(|e| FormatError::DeserializationFailed(e.to_string())),
127        OutputFormat::Bincode => bincode::deserialize(data)
128            .map_err(|e| FormatError::DeserializationFailed(e.to_string())),
129        OutputFormat::Text => Err(FormatError::UnsupportedFormat(
130            "cannot deserialize from text format".to_string(),
131        )),
132    }
133}
134
135fn format_engram_text(engram: &Engram) -> String {
136    format!(
137        "Engram {{ root_nnz: {}, codebook_size: {} }}",
138        engram.root.pos.len() + engram.root.neg.len(),
139        engram.codebook.len()
140    )
141}
142
143// ============================================================================
144// Manifest Conversions
145// ============================================================================
146
147/// Convert Manifest to specified format
148pub fn manifest_to_format(
149    manifest: &Manifest,
150    format: OutputFormat,
151) -> Result<Vec<u8>, FormatError> {
152    match format {
153        OutputFormat::Json => serde_json::to_vec(manifest)
154            .map_err(|e| FormatError::SerializationFailed(e.to_string())),
155        OutputFormat::JsonPretty => serde_json::to_vec_pretty(manifest)
156            .map_err(|e| FormatError::SerializationFailed(e.to_string())),
157        OutputFormat::Bincode => bincode::serialize(manifest)
158            .map_err(|e| FormatError::SerializationFailed(e.to_string())),
159        OutputFormat::Text => Ok(format_manifest_text(manifest).into_bytes()),
160    }
161}
162
163/// Convert from bytes to Manifest
164pub fn manifest_from_format(data: &[u8], format: OutputFormat) -> Result<Manifest, FormatError> {
165    match format {
166        OutputFormat::Json | OutputFormat::JsonPretty => serde_json::from_slice(data)
167            .map_err(|e| FormatError::DeserializationFailed(e.to_string())),
168        OutputFormat::Bincode => bincode::deserialize(data)
169            .map_err(|e| FormatError::DeserializationFailed(e.to_string())),
170        OutputFormat::Text => Err(FormatError::UnsupportedFormat(
171            "cannot deserialize from text format".to_string(),
172        )),
173    }
174}
175
176fn format_manifest_text(manifest: &Manifest) -> String {
177    format!(
178        "Manifest {{ files: {}, total_chunks: {} }}",
179        manifest.files.len(),
180        manifest.total_chunks
181    )
182}
183
184// ============================================================================
185// SubEngram Conversions
186// ============================================================================
187
188/// Convert SubEngram to specified format
189pub fn sub_engram_to_format(sub: &SubEngram, format: OutputFormat) -> Result<Vec<u8>, FormatError> {
190    match format {
191        OutputFormat::Json => {
192            serde_json::to_vec(sub).map_err(|e| FormatError::SerializationFailed(e.to_string()))
193        }
194        OutputFormat::JsonPretty => serde_json::to_vec_pretty(sub)
195            .map_err(|e| FormatError::SerializationFailed(e.to_string())),
196        OutputFormat::Bincode => {
197            bincode::serialize(sub).map_err(|e| FormatError::SerializationFailed(e.to_string()))
198        }
199        OutputFormat::Text => Ok(format_sub_engram_text(sub).into_bytes()),
200    }
201}
202
203/// Convert from bytes to SubEngram
204pub fn sub_engram_from_format(data: &[u8], format: OutputFormat) -> Result<SubEngram, FormatError> {
205    match format {
206        OutputFormat::Json | OutputFormat::JsonPretty => serde_json::from_slice(data)
207            .map_err(|e| FormatError::DeserializationFailed(e.to_string())),
208        OutputFormat::Bincode => bincode::deserialize(data)
209            .map_err(|e| FormatError::DeserializationFailed(e.to_string())),
210        OutputFormat::Text => Err(FormatError::UnsupportedFormat(
211            "cannot deserialize from text format".to_string(),
212        )),
213    }
214}
215
216fn format_sub_engram_text(sub: &SubEngram) -> String {
217    format!(
218        "SubEngram {{ chunk_ids: {}, children: {} }}",
219        sub.chunk_ids.len(),
220        sub.children.len()
221    )
222}
223
224// ============================================================================
225// Config Conversions
226// ============================================================================
227
228/// Convert ReversibleVSAConfig to specified format
229pub fn vsa_config_to_format(
230    config: &ReversibleVSAConfig,
231    format: OutputFormat,
232) -> Result<Vec<u8>, FormatError> {
233    match format {
234        OutputFormat::Json => {
235            serde_json::to_vec(config).map_err(|e| FormatError::SerializationFailed(e.to_string()))
236        }
237        OutputFormat::JsonPretty => serde_json::to_vec_pretty(config)
238            .map_err(|e| FormatError::SerializationFailed(e.to_string())),
239        OutputFormat::Bincode => {
240            bincode::serialize(config).map_err(|e| FormatError::SerializationFailed(e.to_string()))
241        }
242        OutputFormat::Text => Ok(format_vsa_config_text(config).into_bytes()),
243    }
244}
245
246/// Convert from bytes to ReversibleVSAConfig
247pub fn vsa_config_from_format(
248    data: &[u8],
249    format: OutputFormat,
250) -> Result<ReversibleVSAConfig, FormatError> {
251    match format {
252        OutputFormat::Json | OutputFormat::JsonPretty => serde_json::from_slice(data)
253            .map_err(|e| FormatError::DeserializationFailed(e.to_string())),
254        OutputFormat::Bincode => bincode::deserialize(data)
255            .map_err(|e| FormatError::DeserializationFailed(e.to_string())),
256        OutputFormat::Text => Err(FormatError::UnsupportedFormat(
257            "cannot deserialize from text format".to_string(),
258        )),
259    }
260}
261
262fn format_vsa_config_text(config: &ReversibleVSAConfig) -> String {
263    format!(
264        "ReversibleVSAConfig {{ block_size: {}, max_path_depth: {}, base_shift: {}, target_sparsity: {} }}",
265        config.block_size, config.max_path_depth, config.base_shift, config.target_sparsity
266    )
267}
268
269#[cfg(test)]
270mod tests {
271    use super::*;
272
273    #[test]
274    fn test_sparse_vec_roundtrip_json() {
275        let vec = SparseVec {
276            pos: vec![1, 5, 9],
277            neg: vec![2, 6, 10],
278        };
279
280        let bytes = sparse_vec_to_format(&vec, OutputFormat::Json).unwrap();
281        let decoded = sparse_vec_from_format(&bytes, OutputFormat::Json).unwrap();
282
283        assert_eq!(vec.pos, decoded.pos);
284        assert_eq!(vec.neg, decoded.neg);
285    }
286
287    #[test]
288    fn test_sparse_vec_roundtrip_bincode() {
289        let vec = SparseVec {
290            pos: vec![1, 5, 9],
291            neg: vec![2, 6, 10],
292        };
293
294        let bytes = sparse_vec_to_format(&vec, OutputFormat::Bincode).unwrap();
295        let decoded = sparse_vec_from_format(&bytes, OutputFormat::Bincode).unwrap();
296
297        assert_eq!(vec.pos, decoded.pos);
298        assert_eq!(vec.neg, decoded.neg);
299    }
300
301    #[test]
302    fn test_sparse_vec_text_format() {
303        let vec = SparseVec {
304            pos: vec![1, 5, 9],
305            neg: vec![2, 6, 10],
306        };
307
308        let bytes = sparse_vec_to_format(&vec, OutputFormat::Text).unwrap();
309        let text = String::from_utf8(bytes).unwrap();
310
311        assert!(text.contains("SparseVec"));
312        assert!(text.contains("nnz: 6"));
313    }
314
315    #[test]
316    fn test_vsa_config_roundtrip() {
317        let config = ReversibleVSAConfig {
318            block_size: 256,
319            max_path_depth: 10,
320            base_shift: 1000,
321            target_sparsity: 200,
322        };
323
324        // JSON round-trip
325        let bytes = vsa_config_to_format(&config, OutputFormat::Json).unwrap();
326        let decoded = vsa_config_from_format(&bytes, OutputFormat::Json).unwrap();
327        assert_eq!(config.block_size, decoded.block_size);
328        assert_eq!(config.max_path_depth, decoded.max_path_depth);
329
330        // Bincode round-trip
331        let bytes = vsa_config_to_format(&config, OutputFormat::Bincode).unwrap();
332        let decoded = vsa_config_from_format(&bytes, OutputFormat::Bincode).unwrap();
333        assert_eq!(config.block_size, decoded.block_size);
334        assert_eq!(config.max_path_depth, decoded.max_path_depth);
335    }
336
337    #[test]
338    fn test_text_format_no_deserialize() {
339        let vec = SparseVec {
340            pos: vec![1, 5, 9],
341            neg: vec![2, 6, 10],
342        };
343
344        let bytes = sparse_vec_to_format(&vec, OutputFormat::Text).unwrap();
345        let result = sparse_vec_from_format(&bytes, OutputFormat::Text);
346
347        assert!(result.is_err());
348        assert!(matches!(result, Err(FormatError::UnsupportedFormat(_))));
349    }
350}