1use embeddenator_fs::{Engram, Manifest, SubEngram};
12use embeddenator_vsa::{ReversibleVSAConfig, SparseVec};
13use std::io;
14
15#[derive(Debug, Clone, Copy, PartialEq, Eq)]
17pub enum OutputFormat {
18 Json,
20 JsonPretty,
22 Bincode,
24 Text,
26}
27
28#[derive(Debug, Clone)]
30pub enum FormatError {
31 SerializationFailed(String),
33 DeserializationFailed(String),
35 UnsupportedFormat(String),
37 IoError(String),
39}
40
41impl std::fmt::Display for FormatError {
42 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
43 match self {
44 FormatError::SerializationFailed(msg) => write!(f, "serialization failed: {}", msg),
45 FormatError::DeserializationFailed(msg) => {
46 write!(f, "deserialization failed: {}", msg)
47 }
48 FormatError::UnsupportedFormat(msg) => write!(f, "unsupported format: {}", msg),
49 FormatError::IoError(msg) => write!(f, "I/O error: {}", msg),
50 }
51 }
52}
53
54impl std::error::Error for FormatError {}
55
56impl From<io::Error> for FormatError {
57 fn from(err: io::Error) -> Self {
58 FormatError::IoError(err.to_string())
59 }
60}
61
62pub fn sparse_vec_to_format(vec: &SparseVec, format: OutputFormat) -> Result<Vec<u8>, FormatError> {
68 match format {
69 OutputFormat::Json => {
70 serde_json::to_vec(vec).map_err(|e| FormatError::SerializationFailed(e.to_string()))
71 }
72 OutputFormat::JsonPretty => serde_json::to_vec_pretty(vec)
73 .map_err(|e| FormatError::SerializationFailed(e.to_string())),
74 OutputFormat::Bincode => {
75 bincode::serialize(vec).map_err(|e| FormatError::SerializationFailed(e.to_string()))
76 }
77 OutputFormat::Text => Ok(format_sparse_vec_text(vec).into_bytes()),
78 }
79}
80
81pub fn sparse_vec_from_format(data: &[u8], format: OutputFormat) -> Result<SparseVec, FormatError> {
83 match format {
84 OutputFormat::Json | OutputFormat::JsonPretty => serde_json::from_slice(data)
85 .map_err(|e| FormatError::DeserializationFailed(e.to_string())),
86 OutputFormat::Bincode => bincode::deserialize(data)
87 .map_err(|e| FormatError::DeserializationFailed(e.to_string())),
88 OutputFormat::Text => Err(FormatError::UnsupportedFormat(
89 "cannot deserialize from text format".to_string(),
90 )),
91 }
92}
93
94fn format_sparse_vec_text(vec: &SparseVec) -> String {
95 format!(
96 "SparseVec {{ pos: {:?}, neg: {:?}, nnz: {} }}",
97 &vec.pos[..vec.pos.len().min(10)],
98 &vec.neg[..vec.neg.len().min(10)],
99 vec.pos.len() + vec.neg.len()
100 )
101}
102
103pub fn engram_to_format(engram: &Engram, format: OutputFormat) -> Result<Vec<u8>, FormatError> {
109 match format {
110 OutputFormat::Json => {
111 serde_json::to_vec(engram).map_err(|e| FormatError::SerializationFailed(e.to_string()))
112 }
113 OutputFormat::JsonPretty => serde_json::to_vec_pretty(engram)
114 .map_err(|e| FormatError::SerializationFailed(e.to_string())),
115 OutputFormat::Bincode => {
116 bincode::serialize(engram).map_err(|e| FormatError::SerializationFailed(e.to_string()))
117 }
118 OutputFormat::Text => Ok(format_engram_text(engram).into_bytes()),
119 }
120}
121
122pub fn engram_from_format(data: &[u8], format: OutputFormat) -> Result<Engram, FormatError> {
124 match format {
125 OutputFormat::Json | OutputFormat::JsonPretty => serde_json::from_slice(data)
126 .map_err(|e| FormatError::DeserializationFailed(e.to_string())),
127 OutputFormat::Bincode => bincode::deserialize(data)
128 .map_err(|e| FormatError::DeserializationFailed(e.to_string())),
129 OutputFormat::Text => Err(FormatError::UnsupportedFormat(
130 "cannot deserialize from text format".to_string(),
131 )),
132 }
133}
134
135fn format_engram_text(engram: &Engram) -> String {
136 format!(
137 "Engram {{ root_nnz: {}, codebook_size: {} }}",
138 engram.root.pos.len() + engram.root.neg.len(),
139 engram.codebook.len()
140 )
141}
142
143pub fn manifest_to_format(
149 manifest: &Manifest,
150 format: OutputFormat,
151) -> Result<Vec<u8>, FormatError> {
152 match format {
153 OutputFormat::Json => serde_json::to_vec(manifest)
154 .map_err(|e| FormatError::SerializationFailed(e.to_string())),
155 OutputFormat::JsonPretty => serde_json::to_vec_pretty(manifest)
156 .map_err(|e| FormatError::SerializationFailed(e.to_string())),
157 OutputFormat::Bincode => bincode::serialize(manifest)
158 .map_err(|e| FormatError::SerializationFailed(e.to_string())),
159 OutputFormat::Text => Ok(format_manifest_text(manifest).into_bytes()),
160 }
161}
162
163pub fn manifest_from_format(data: &[u8], format: OutputFormat) -> Result<Manifest, FormatError> {
165 match format {
166 OutputFormat::Json | OutputFormat::JsonPretty => serde_json::from_slice(data)
167 .map_err(|e| FormatError::DeserializationFailed(e.to_string())),
168 OutputFormat::Bincode => bincode::deserialize(data)
169 .map_err(|e| FormatError::DeserializationFailed(e.to_string())),
170 OutputFormat::Text => Err(FormatError::UnsupportedFormat(
171 "cannot deserialize from text format".to_string(),
172 )),
173 }
174}
175
176fn format_manifest_text(manifest: &Manifest) -> String {
177 format!(
178 "Manifest {{ files: {}, total_chunks: {} }}",
179 manifest.files.len(),
180 manifest.total_chunks
181 )
182}
183
184pub fn sub_engram_to_format(sub: &SubEngram, format: OutputFormat) -> Result<Vec<u8>, FormatError> {
190 match format {
191 OutputFormat::Json => {
192 serde_json::to_vec(sub).map_err(|e| FormatError::SerializationFailed(e.to_string()))
193 }
194 OutputFormat::JsonPretty => serde_json::to_vec_pretty(sub)
195 .map_err(|e| FormatError::SerializationFailed(e.to_string())),
196 OutputFormat::Bincode => {
197 bincode::serialize(sub).map_err(|e| FormatError::SerializationFailed(e.to_string()))
198 }
199 OutputFormat::Text => Ok(format_sub_engram_text(sub).into_bytes()),
200 }
201}
202
203pub fn sub_engram_from_format(data: &[u8], format: OutputFormat) -> Result<SubEngram, FormatError> {
205 match format {
206 OutputFormat::Json | OutputFormat::JsonPretty => serde_json::from_slice(data)
207 .map_err(|e| FormatError::DeserializationFailed(e.to_string())),
208 OutputFormat::Bincode => bincode::deserialize(data)
209 .map_err(|e| FormatError::DeserializationFailed(e.to_string())),
210 OutputFormat::Text => Err(FormatError::UnsupportedFormat(
211 "cannot deserialize from text format".to_string(),
212 )),
213 }
214}
215
216fn format_sub_engram_text(sub: &SubEngram) -> String {
217 format!(
218 "SubEngram {{ chunk_ids: {}, children: {} }}",
219 sub.chunk_ids.len(),
220 sub.children.len()
221 )
222}
223
224pub fn vsa_config_to_format(
230 config: &ReversibleVSAConfig,
231 format: OutputFormat,
232) -> Result<Vec<u8>, FormatError> {
233 match format {
234 OutputFormat::Json => {
235 serde_json::to_vec(config).map_err(|e| FormatError::SerializationFailed(e.to_string()))
236 }
237 OutputFormat::JsonPretty => serde_json::to_vec_pretty(config)
238 .map_err(|e| FormatError::SerializationFailed(e.to_string())),
239 OutputFormat::Bincode => {
240 bincode::serialize(config).map_err(|e| FormatError::SerializationFailed(e.to_string()))
241 }
242 OutputFormat::Text => Ok(format_vsa_config_text(config).into_bytes()),
243 }
244}
245
246pub fn vsa_config_from_format(
248 data: &[u8],
249 format: OutputFormat,
250) -> Result<ReversibleVSAConfig, FormatError> {
251 match format {
252 OutputFormat::Json | OutputFormat::JsonPretty => serde_json::from_slice(data)
253 .map_err(|e| FormatError::DeserializationFailed(e.to_string())),
254 OutputFormat::Bincode => bincode::deserialize(data)
255 .map_err(|e| FormatError::DeserializationFailed(e.to_string())),
256 OutputFormat::Text => Err(FormatError::UnsupportedFormat(
257 "cannot deserialize from text format".to_string(),
258 )),
259 }
260}
261
262fn format_vsa_config_text(config: &ReversibleVSAConfig) -> String {
263 format!(
264 "ReversibleVSAConfig {{ block_size: {}, max_path_depth: {}, base_shift: {}, target_sparsity: {} }}",
265 config.block_size, config.max_path_depth, config.base_shift, config.target_sparsity
266 )
267}
268
269#[cfg(test)]
270mod tests {
271 use super::*;
272
273 #[test]
274 fn test_sparse_vec_roundtrip_json() {
275 let vec = SparseVec {
276 pos: vec![1, 5, 9],
277 neg: vec![2, 6, 10],
278 };
279
280 let bytes = sparse_vec_to_format(&vec, OutputFormat::Json).unwrap();
281 let decoded = sparse_vec_from_format(&bytes, OutputFormat::Json).unwrap();
282
283 assert_eq!(vec.pos, decoded.pos);
284 assert_eq!(vec.neg, decoded.neg);
285 }
286
287 #[test]
288 fn test_sparse_vec_roundtrip_bincode() {
289 let vec = SparseVec {
290 pos: vec![1, 5, 9],
291 neg: vec![2, 6, 10],
292 };
293
294 let bytes = sparse_vec_to_format(&vec, OutputFormat::Bincode).unwrap();
295 let decoded = sparse_vec_from_format(&bytes, OutputFormat::Bincode).unwrap();
296
297 assert_eq!(vec.pos, decoded.pos);
298 assert_eq!(vec.neg, decoded.neg);
299 }
300
301 #[test]
302 fn test_sparse_vec_text_format() {
303 let vec = SparseVec {
304 pos: vec![1, 5, 9],
305 neg: vec![2, 6, 10],
306 };
307
308 let bytes = sparse_vec_to_format(&vec, OutputFormat::Text).unwrap();
309 let text = String::from_utf8(bytes).unwrap();
310
311 assert!(text.contains("SparseVec"));
312 assert!(text.contains("nnz: 6"));
313 }
314
315 #[test]
316 fn test_vsa_config_roundtrip() {
317 let config = ReversibleVSAConfig {
318 block_size: 256,
319 max_path_depth: 10,
320 base_shift: 1000,
321 target_sparsity: 200,
322 };
323
324 let bytes = vsa_config_to_format(&config, OutputFormat::Json).unwrap();
326 let decoded = vsa_config_from_format(&bytes, OutputFormat::Json).unwrap();
327 assert_eq!(config.block_size, decoded.block_size);
328 assert_eq!(config.max_path_depth, decoded.max_path_depth);
329
330 let bytes = vsa_config_to_format(&config, OutputFormat::Bincode).unwrap();
332 let decoded = vsa_config_from_format(&bytes, OutputFormat::Bincode).unwrap();
333 assert_eq!(config.block_size, decoded.block_size);
334 assert_eq!(config.max_path_depth, decoded.max_path_depth);
335 }
336
337 #[test]
338 fn test_text_format_no_deserialize() {
339 let vec = SparseVec {
340 pos: vec![1, 5, 9],
341 neg: vec![2, 6, 10],
342 };
343
344 let bytes = sparse_vec_to_format(&vec, OutputFormat::Text).unwrap();
345 let result = sparse_vec_from_format(&bytes, OutputFormat::Text);
346
347 assert!(result.is_err());
348 assert!(matches!(result, Err(FormatError::UnsupportedFormat(_))));
349 }
350}