1use embeddenator_fs::{Engram, Manifest};
7use embeddenator_io::{BinaryWriteOptions, CompressionCodec, PayloadKind};
8use embeddenator_vsa::{ReversibleVSAConfig, SparseVec};
9use std::io;
10use std::path::Path;
11
12fn write_json_to_file<P: AsRef<Path>, T: serde::Serialize>(path: P, value: &T) -> io::Result<()> {
14 let json = serde_json::to_string_pretty(value).map_err(io::Error::other)?;
15 std::fs::write(path, json)
16}
17
18fn read_json_from_file<P: AsRef<Path>, T: serde::de::DeserializeOwned>(path: P) -> io::Result<T> {
19 let data = std::fs::read(path)?;
20 serde_json::from_slice(&data).map_err(io::Error::other)
21}
22
23fn write_bincode_to_file<P: AsRef<Path>, T: serde::Serialize>(
24 path: P,
25 value: &T,
26) -> io::Result<()> {
27 let bytes = bincode::serialize(value).map_err(io::Error::other)?;
28 std::fs::write(path, bytes)
29}
30
31fn read_bincode_from_file<P: AsRef<Path>, T: serde::de::DeserializeOwned>(
32 path: P,
33) -> io::Result<T> {
34 let data = std::fs::read(path)?;
35 bincode::deserialize(&data).map_err(io::Error::other)
36}
37
38fn wrap_with_envelope(
39 kind: PayloadKind,
40 opts: BinaryWriteOptions,
41 data: &[u8],
42) -> io::Result<Vec<u8>> {
43 embeddenator_io::wrap_or_legacy(kind, opts, data)
45}
46
47fn unwrap_from_envelope(kind: PayloadKind, data: &[u8]) -> io::Result<Vec<u8>> {
48 embeddenator_io::unwrap_auto(kind, data)
50}
51
52pub struct EnvelopeAdapter;
54
55impl EnvelopeAdapter {
56 pub fn wrap_engram(
58 engram: &Engram,
59 codec: CompressionCodec,
60 level: Option<i32>,
61 ) -> io::Result<Vec<u8>> {
62 let serialized = bincode::serialize(engram).map_err(io::Error::other)?;
63 let opts = BinaryWriteOptions { codec, level };
64 wrap_with_envelope(PayloadKind::EngramBincode, opts, &serialized)
65 }
66
67 pub fn unwrap_engram(data: &[u8]) -> io::Result<Engram> {
69 let decoded = unwrap_from_envelope(PayloadKind::EngramBincode, data)?;
70 bincode::deserialize(&decoded).map_err(io::Error::other)
71 }
72
73 pub fn wrap_sub_engram(
75 sub: &embeddenator_fs::SubEngram,
76 codec: CompressionCodec,
77 level: Option<i32>,
78 ) -> io::Result<Vec<u8>> {
79 let serialized = bincode::serialize(sub).map_err(io::Error::other)?;
80 let opts = BinaryWriteOptions { codec, level };
81 wrap_with_envelope(PayloadKind::SubEngramBincode, opts, &serialized)
82 }
83
84 pub fn unwrap_sub_engram(data: &[u8]) -> io::Result<embeddenator_fs::SubEngram> {
86 let decoded = unwrap_from_envelope(PayloadKind::SubEngramBincode, data)?;
87 bincode::deserialize(&decoded).map_err(io::Error::other)
88 }
89}
90
91pub struct FileAdapter;
93
94impl FileAdapter {
95 pub fn save_engram<P: AsRef<Path>>(
97 path: P,
98 engram: &Engram,
99 codec: CompressionCodec,
100 ) -> io::Result<()> {
101 let wrapped = EnvelopeAdapter::wrap_engram(engram, codec, None)?;
102 std::fs::write(path, wrapped)
103 }
104
105 pub fn load_engram<P: AsRef<Path>>(path: P) -> io::Result<Engram> {
107 let data = std::fs::read(path)?;
108 EnvelopeAdapter::unwrap_engram(&data)
109 }
110
111 pub fn save_manifest<P: AsRef<Path>>(path: P, manifest: &Manifest) -> io::Result<()> {
113 write_json_to_file(path, manifest)
114 }
115
116 pub fn load_manifest<P: AsRef<Path>>(path: P) -> io::Result<Manifest> {
118 read_json_from_file(path)
119 }
120
121 pub fn save_sparse_vec<P: AsRef<Path>>(path: P, vec: &SparseVec) -> io::Result<()> {
123 write_bincode_to_file(path, vec)
124 }
125
126 pub fn load_sparse_vec<P: AsRef<Path>>(path: P) -> io::Result<SparseVec> {
128 read_bincode_from_file(path)
129 }
130
131 pub fn save_vsa_config<P: AsRef<Path>>(
133 path: P,
134 config: &ReversibleVSAConfig,
135 ) -> io::Result<()> {
136 write_json_to_file(path, config)
137 }
138
139 pub fn load_vsa_config<P: AsRef<Path>>(path: P) -> io::Result<ReversibleVSAConfig> {
141 read_json_from_file(path)
142 }
143}
144
145pub struct StreamAdapter;
147
148impl StreamAdapter {
149 pub fn stream_encode<R: io::Read>(
151 mut reader: R,
152 config: &ReversibleVSAConfig,
153 chunk_size: usize,
154 ) -> io::Result<Vec<SparseVec>> {
155 let mut vectors = Vec::new();
156 let mut buffer = vec![0u8; chunk_size];
157
158 loop {
159 let n = reader.read(&mut buffer)?;
160 if n == 0 {
161 break;
162 }
163
164 let vec = SparseVec::encode_data(&buffer[..n], config, None);
165 vectors.push(vec);
166 }
167
168 Ok(vectors)
169 }
170
171 pub fn stream_decode<W: io::Write>(
173 vectors: &[SparseVec],
174 config: &ReversibleVSAConfig,
175 expected_size: usize,
176 mut writer: W,
177 ) -> io::Result<()> {
178 for vec in vectors {
179 let decoded = vec.decode_data(config, None, expected_size);
180 writer.write_all(&decoded)?;
181 }
182 Ok(())
183 }
184}
185
186pub struct BatchAdapter;
188
189impl BatchAdapter {
190 pub fn batch_encode(data_chunks: &[&[u8]], config: &ReversibleVSAConfig) -> Vec<SparseVec> {
192 data_chunks
193 .iter()
194 .map(|chunk| SparseVec::encode_data(chunk, config, None))
195 .collect()
196 }
197
198 pub fn batch_decode(
200 vectors: &[SparseVec],
201 config: &ReversibleVSAConfig,
202 expected_size: usize,
203 ) -> Vec<Vec<u8>> {
204 vectors
205 .iter()
206 .map(|vec| vec.decode_data(config, None, expected_size))
207 .collect()
208 }
209
210 pub fn batch_similarity(query: &SparseVec, vectors: &[SparseVec]) -> Vec<f64> {
212 vectors.iter().map(|vec| query.cosine(vec)).collect()
213 }
214
215 pub fn batch_bundle(vectors: &[SparseVec]) -> Option<SparseVec> {
217 if vectors.is_empty() {
218 return None;
219 }
220
221 let mut result = vectors[0].clone();
222 for vec in &vectors[1..] {
223 result = result.bundle(vec);
224 }
225 Some(result)
226 }
227}
228
229pub struct AutoFormatAdapter;
231
232impl AutoFormatAdapter {
233 pub fn auto_load_engram<P: AsRef<Path>>(path: P) -> io::Result<Engram> {
235 let data = std::fs::read(path)?;
236
237 if let Ok(engram) = EnvelopeAdapter::unwrap_engram(&data) {
239 return Ok(engram);
240 }
241
242 if let Ok(engram) = bincode::deserialize::<Engram>(&data) {
244 return Ok(engram);
245 }
246
247 if let Ok(json_str) = std::str::from_utf8(&data) {
249 if let Ok(engram) = serde_json::from_str::<Engram>(json_str) {
250 return Ok(engram);
251 }
252 }
253
254 Err(io::Error::other("unable to detect engram format"))
255 }
256
257 pub fn auto_load_manifest<P: AsRef<Path>>(path: P) -> io::Result<Manifest> {
259 let data = std::fs::read(path)?;
260
261 if let Ok(json_str) = std::str::from_utf8(&data) {
263 if let Ok(manifest) = serde_json::from_str::<Manifest>(json_str) {
264 return Ok(manifest);
265 }
266 }
267
268 if let Ok(manifest) = bincode::deserialize::<Manifest>(&data) {
270 return Ok(manifest);
271 }
272
273 Err(io::Error::other("unable to detect manifest format"))
274 }
275}
276
277#[cfg(test)]
278mod tests {
279 use super::*;
280 use tempfile::tempdir;
281
282 #[test]
283 fn test_envelope_adapter_engram() {
284 use embeddenator_fs::CorrectionStore;
285
286 let engram = Engram {
288 root: SparseVec {
289 pos: vec![1, 2, 3],
290 neg: vec![],
291 },
292 codebook: std::collections::HashMap::new(),
293 corrections: CorrectionStore::default(),
294 };
295
296 let wrapped = EnvelopeAdapter::wrap_engram(&engram, CompressionCodec::None, None).unwrap();
298 let unwrapped = EnvelopeAdapter::unwrap_engram(&wrapped).unwrap();
299 assert_eq!(engram.root.pos, unwrapped.root.pos);
300 assert_eq!(engram.root.neg, unwrapped.root.neg);
301 }
302
303 #[test]
304 #[cfg(feature = "compression-zstd")]
305 fn test_envelope_adapter_zstd_compression() {
306 use embeddenator_fs::CorrectionStore;
307
308 let mut codebook = std::collections::HashMap::new();
310 for i in 0..100 {
311 codebook.insert(
312 i,
313 SparseVec {
314 pos: vec![i, i + 1, i + 2],
315 neg: vec![i + 3, i + 4],
316 },
317 );
318 }
319
320 let engram = Engram {
321 root: SparseVec {
322 pos: (0..50).collect(),
323 neg: (50..100).collect(),
324 },
325 codebook,
326 corrections: CorrectionStore::default(),
327 };
328
329 let wrapped =
331 EnvelopeAdapter::wrap_engram(&engram, CompressionCodec::Zstd, Some(3)).unwrap();
332 let unwrapped = EnvelopeAdapter::unwrap_engram(&wrapped).unwrap();
333
334 assert_eq!(engram.root.pos, unwrapped.root.pos);
335 assert_eq!(engram.root.neg, unwrapped.root.neg);
336 assert_eq!(engram.codebook.len(), unwrapped.codebook.len());
337
338 let uncompressed = bincode::serialize(&engram).unwrap();
340 println!(
341 "Uncompressed size: {}, Compressed size: {}",
342 uncompressed.len(),
343 wrapped.len()
344 );
345 assert!(wrapped.len() < uncompressed.len());
346 }
347
348 #[test]
349 #[cfg(feature = "compression-lz4")]
350 fn test_envelope_adapter_lz4_compression() {
351 use embeddenator_fs::CorrectionStore;
352
353 let mut codebook = std::collections::HashMap::new();
355 for i in 0..50 {
356 codebook.insert(
357 i,
358 SparseVec {
359 pos: vec![1, 2, 3, 4, 5],
360 neg: vec![6, 7, 8],
361 },
362 );
363 }
364
365 let engram = Engram {
366 root: SparseVec {
367 pos: vec![1, 2, 3, 4, 5, 1, 2, 3, 4, 5],
368 neg: vec![6, 7, 8, 6, 7, 8],
369 },
370 codebook,
371 corrections: CorrectionStore::default(),
372 };
373
374 let wrapped = EnvelopeAdapter::wrap_engram(&engram, CompressionCodec::Lz4, None).unwrap();
376 let unwrapped = EnvelopeAdapter::unwrap_engram(&wrapped).unwrap();
377
378 assert_eq!(engram.root.pos, unwrapped.root.pos);
379 assert_eq!(engram.root.neg, unwrapped.root.neg);
380 assert_eq!(engram.codebook.len(), unwrapped.codebook.len());
381
382 let uncompressed = bincode::serialize(&engram).unwrap();
384 println!(
385 "Uncompressed size: {}, Compressed size: {}",
386 uncompressed.len(),
387 wrapped.len()
388 );
389 assert!(wrapped.len() < uncompressed.len());
390 }
391
392 #[test]
393 fn test_envelope_adapter_sub_engram() {
394 use embeddenator_fs::SubEngram;
395
396 let sub = SubEngram {
397 id: "test_sub".to_string(),
398 root: SparseVec {
399 pos: vec![10, 20, 30],
400 neg: vec![40, 50],
401 },
402 chunk_ids: vec![1, 2, 3],
403 chunk_count: 3,
404 children: vec![],
405 };
406
407 let wrapped = EnvelopeAdapter::wrap_sub_engram(&sub, CompressionCodec::None, None).unwrap();
409 let unwrapped = EnvelopeAdapter::unwrap_sub_engram(&wrapped).unwrap();
410 assert_eq!(sub.root.pos, unwrapped.root.pos);
411 assert_eq!(sub.root.neg, unwrapped.root.neg);
412 assert_eq!(sub.id, unwrapped.id);
413 }
414
415 #[test]
416 fn test_file_adapter_with_compression() {
417 use embeddenator_fs::CorrectionStore;
418 let dir = tempdir().unwrap();
419
420 let engram = Engram {
421 root: SparseVec {
422 pos: vec![1, 2, 3],
423 neg: vec![4, 5],
424 },
425 codebook: std::collections::HashMap::new(),
426 corrections: CorrectionStore::default(),
427 };
428
429 let path = dir.path().join("engram.bin");
431 FileAdapter::save_engram(&path, &engram, CompressionCodec::None).unwrap();
432 let loaded = FileAdapter::load_engram(&path).unwrap();
433 assert_eq!(engram.root.pos, loaded.root.pos);
434 assert_eq!(engram.root.neg, loaded.root.neg);
435 }
436
437 #[test]
438 #[cfg(feature = "compression")]
439 fn test_compression_round_trip() {
440 use embeddenator_fs::CorrectionStore;
441
442 let engram = Engram {
443 root: SparseVec {
444 pos: (0..100).collect(),
445 neg: (100..200).collect(),
446 },
447 codebook: std::collections::HashMap::new(),
448 corrections: CorrectionStore::default(),
449 };
450
451 for codec in &[
453 CompressionCodec::None,
454 CompressionCodec::Zstd,
455 CompressionCodec::Lz4,
456 ] {
457 let wrapped = EnvelopeAdapter::wrap_engram(&engram, *codec, Some(3)).unwrap();
458 let unwrapped = EnvelopeAdapter::unwrap_engram(&wrapped).unwrap();
459 assert_eq!(
460 engram.root.pos, unwrapped.root.pos,
461 "Failed for codec {:?}",
462 codec
463 );
464 assert_eq!(
465 engram.root.neg, unwrapped.root.neg,
466 "Failed for codec {:?}",
467 codec
468 );
469 }
470 }
471
472 #[test]
473 fn test_file_adapter() {
474 let dir = tempdir().unwrap();
475
476 let vec = SparseVec {
478 pos: vec![1, 2, 3],
479 neg: vec![4, 5],
480 };
481 let vec_path = dir.path().join("vec.bin");
482 FileAdapter::save_sparse_vec(&vec_path, &vec).unwrap();
483 let loaded = FileAdapter::load_sparse_vec(&vec_path).unwrap();
484 assert_eq!(vec.pos, loaded.pos);
485 assert_eq!(vec.neg, loaded.neg);
486
487 let config = ReversibleVSAConfig::default();
489 let config_path = dir.path().join("config.json");
490 FileAdapter::save_vsa_config(&config_path, &config).unwrap();
491 let loaded_config = FileAdapter::load_vsa_config(&config_path).unwrap();
492 assert_eq!(config.block_size, loaded_config.block_size);
493 assert_eq!(config.max_path_depth, loaded_config.max_path_depth);
494 }
495
496 #[test]
497 fn test_batch_adapter() {
498 let config = ReversibleVSAConfig::default();
499 let data_chunks = vec![b"hello".as_slice(), b"world".as_slice()];
500
501 let vectors = BatchAdapter::batch_encode(&data_chunks, &config);
503 assert_eq!(vectors.len(), 2);
504
505 let decoded = BatchAdapter::batch_decode(&vectors, &config, 5);
507 assert_eq!(decoded.len(), 2);
508
509 let query = SparseVec::new();
511 let similarities = BatchAdapter::batch_similarity(&query, &vectors);
512 assert_eq!(similarities.len(), 2);
513
514 let bundled = BatchAdapter::batch_bundle(&vectors);
516 assert!(bundled.is_some());
517 }
518
519 #[test]
520 fn test_stream_adapter() {
521 let config = ReversibleVSAConfig::default();
522 let data = b"hello world from streaming";
523 let cursor = io::Cursor::new(data);
524
525 let vectors = StreamAdapter::stream_encode(cursor, &config, 8).unwrap();
527 assert!(!vectors.is_empty());
528
529 let mut output = Vec::new();
531 StreamAdapter::stream_decode(&vectors, &config, 8, &mut output).unwrap();
532 assert!(!output.is_empty());
534 }
535
536 #[test]
537 fn test_auto_format_adapter() {
538 let dir = tempdir().unwrap();
539
540 let manifest = Manifest {
542 files: Vec::new(),
543 total_chunks: 0,
544 };
545
546 let path = dir.path().join("manifest.json");
547 FileAdapter::save_manifest(&path, &manifest).unwrap();
548
549 let loaded = AutoFormatAdapter::auto_load_manifest(&path).unwrap();
551 assert_eq!(manifest.total_chunks, loaded.total_chunks);
552 }
553}