Skip to main content

embeddenator_interop/
adapters.rs

1//! Adapter layers for external library integration.
2//!
3//! This module provides adapters to integrate embeddenator with external
4//! libraries and tools, handling format conversions and API bridging.
5
6use embeddenator_fs::{Engram, Manifest};
7use embeddenator_io::{BinaryWriteOptions, CompressionCodec, PayloadKind};
8use embeddenator_vsa::{ReversibleVSAConfig, SparseVec};
9use std::io;
10use std::path::Path;
11
12// Helper functions that wrap embeddenator_io functionality
13fn write_json_to_file<P: AsRef<Path>, T: serde::Serialize>(path: P, value: &T) -> io::Result<()> {
14    let json = serde_json::to_string_pretty(value).map_err(io::Error::other)?;
15    std::fs::write(path, json)
16}
17
18fn read_json_from_file<P: AsRef<Path>, T: serde::de::DeserializeOwned>(path: P) -> io::Result<T> {
19    let data = std::fs::read(path)?;
20    serde_json::from_slice(&data).map_err(io::Error::other)
21}
22
23fn write_bincode_to_file<P: AsRef<Path>, T: serde::Serialize>(
24    path: P,
25    value: &T,
26) -> io::Result<()> {
27    let bytes = bincode::serialize(value).map_err(io::Error::other)?;
28    std::fs::write(path, bytes)
29}
30
31fn read_bincode_from_file<P: AsRef<Path>, T: serde::de::DeserializeOwned>(
32    path: P,
33) -> io::Result<T> {
34    let data = std::fs::read(path)?;
35    bincode::deserialize(&data).map_err(io::Error::other)
36}
37
38fn wrap_with_envelope(
39    kind: PayloadKind,
40    opts: BinaryWriteOptions,
41    data: &[u8],
42) -> io::Result<Vec<u8>> {
43    // Use embeddenator-io's full envelope support with compression
44    embeddenator_io::wrap_or_legacy(kind, opts, data)
45}
46
47fn unwrap_from_envelope(kind: PayloadKind, data: &[u8]) -> io::Result<Vec<u8>> {
48    // Use embeddenator-io's full envelope unwrapping with decompression
49    embeddenator_io::unwrap_auto(kind, data)
50}
51
52/// Adapter for embeddenator-io envelope format
53pub struct EnvelopeAdapter;
54
55impl EnvelopeAdapter {
56    /// Wrap an Engram in envelope format with compression
57    pub fn wrap_engram(
58        engram: &Engram,
59        codec: CompressionCodec,
60        level: Option<i32>,
61    ) -> io::Result<Vec<u8>> {
62        let serialized = bincode::serialize(engram).map_err(io::Error::other)?;
63        let opts = BinaryWriteOptions { codec, level };
64        wrap_with_envelope(PayloadKind::EngramBincode, opts, &serialized)
65    }
66
67    /// Unwrap an Engram from envelope format
68    pub fn unwrap_engram(data: &[u8]) -> io::Result<Engram> {
69        let decoded = unwrap_from_envelope(PayloadKind::EngramBincode, data)?;
70        bincode::deserialize(&decoded).map_err(io::Error::other)
71    }
72
73    /// Wrap a SubEngram in envelope format with compression
74    pub fn wrap_sub_engram(
75        sub: &embeddenator_fs::SubEngram,
76        codec: CompressionCodec,
77        level: Option<i32>,
78    ) -> io::Result<Vec<u8>> {
79        let serialized = bincode::serialize(sub).map_err(io::Error::other)?;
80        let opts = BinaryWriteOptions { codec, level };
81        wrap_with_envelope(PayloadKind::SubEngramBincode, opts, &serialized)
82    }
83
84    /// Unwrap a SubEngram from envelope format
85    pub fn unwrap_sub_engram(data: &[u8]) -> io::Result<embeddenator_fs::SubEngram> {
86        let decoded = unwrap_from_envelope(PayloadKind::SubEngramBincode, data)?;
87        bincode::deserialize(&decoded).map_err(io::Error::other)
88    }
89}
90
91/// Adapter for file-based operations
92pub struct FileAdapter;
93
94impl FileAdapter {
95    /// Save Engram to file with envelope format
96    pub fn save_engram<P: AsRef<Path>>(
97        path: P,
98        engram: &Engram,
99        codec: CompressionCodec,
100    ) -> io::Result<()> {
101        let wrapped = EnvelopeAdapter::wrap_engram(engram, codec, None)?;
102        std::fs::write(path, wrapped)
103    }
104
105    /// Load Engram from file with envelope format
106    pub fn load_engram<P: AsRef<Path>>(path: P) -> io::Result<Engram> {
107        let data = std::fs::read(path)?;
108        EnvelopeAdapter::unwrap_engram(&data)
109    }
110
111    /// Save Manifest to JSON file
112    pub fn save_manifest<P: AsRef<Path>>(path: P, manifest: &Manifest) -> io::Result<()> {
113        write_json_to_file(path, manifest)
114    }
115
116    /// Load Manifest from JSON file
117    pub fn load_manifest<P: AsRef<Path>>(path: P) -> io::Result<Manifest> {
118        read_json_from_file(path)
119    }
120
121    /// Save SparseVec to bincode file
122    pub fn save_sparse_vec<P: AsRef<Path>>(path: P, vec: &SparseVec) -> io::Result<()> {
123        write_bincode_to_file(path, vec)
124    }
125
126    /// Load SparseVec from bincode file
127    pub fn load_sparse_vec<P: AsRef<Path>>(path: P) -> io::Result<SparseVec> {
128        read_bincode_from_file(path)
129    }
130
131    /// Save VSAConfig to JSON file
132    pub fn save_vsa_config<P: AsRef<Path>>(
133        path: P,
134        config: &ReversibleVSAConfig,
135    ) -> io::Result<()> {
136        write_json_to_file(path, config)
137    }
138
139    /// Load VSAConfig from JSON file
140    pub fn load_vsa_config<P: AsRef<Path>>(path: P) -> io::Result<ReversibleVSAConfig> {
141        read_json_from_file(path)
142    }
143}
144
145/// Adapter for streaming operations
146pub struct StreamAdapter;
147
148impl StreamAdapter {
149    /// Stream process data: encode in chunks
150    pub fn stream_encode<R: io::Read>(
151        mut reader: R,
152        config: &ReversibleVSAConfig,
153        chunk_size: usize,
154    ) -> io::Result<Vec<SparseVec>> {
155        let mut vectors = Vec::new();
156        let mut buffer = vec![0u8; chunk_size];
157
158        loop {
159            let n = reader.read(&mut buffer)?;
160            if n == 0 {
161                break;
162            }
163
164            let vec = SparseVec::encode_data(&buffer[..n], config, None);
165            vectors.push(vec);
166        }
167
168        Ok(vectors)
169    }
170
171    /// Stream process vectors: decode and write
172    pub fn stream_decode<W: io::Write>(
173        vectors: &[SparseVec],
174        config: &ReversibleVSAConfig,
175        expected_size: usize,
176        mut writer: W,
177    ) -> io::Result<()> {
178        for vec in vectors {
179            let decoded = vec.decode_data(config, None, expected_size);
180            writer.write_all(&decoded)?;
181        }
182        Ok(())
183    }
184}
185
186/// Adapter for batch operations
187pub struct BatchAdapter;
188
189impl BatchAdapter {
190    /// Batch encode multiple data chunks
191    pub fn batch_encode(data_chunks: &[&[u8]], config: &ReversibleVSAConfig) -> Vec<SparseVec> {
192        data_chunks
193            .iter()
194            .map(|chunk| SparseVec::encode_data(chunk, config, None))
195            .collect()
196    }
197
198    /// Batch decode multiple vectors
199    pub fn batch_decode(
200        vectors: &[SparseVec],
201        config: &ReversibleVSAConfig,
202        expected_size: usize,
203    ) -> Vec<Vec<u8>> {
204        vectors
205            .iter()
206            .map(|vec| vec.decode_data(config, None, expected_size))
207            .collect()
208    }
209
210    /// Batch compute similarities
211    pub fn batch_similarity(query: &SparseVec, vectors: &[SparseVec]) -> Vec<f64> {
212        vectors.iter().map(|vec| query.cosine(vec)).collect()
213    }
214
215    /// Batch bundle vectors
216    pub fn batch_bundle(vectors: &[SparseVec]) -> Option<SparseVec> {
217        if vectors.is_empty() {
218            return None;
219        }
220
221        let mut result = vectors[0].clone();
222        for vec in &vectors[1..] {
223            result = result.bundle(vec);
224        }
225        Some(result)
226    }
227}
228
229/// Adapter for format detection and auto-conversion
230pub struct AutoFormatAdapter;
231
232impl AutoFormatAdapter {
233    /// Try to load Engram with automatic format detection
234    pub fn auto_load_engram<P: AsRef<Path>>(path: P) -> io::Result<Engram> {
235        let data = std::fs::read(path)?;
236
237        // Try envelope format first
238        if let Ok(engram) = EnvelopeAdapter::unwrap_engram(&data) {
239            return Ok(engram);
240        }
241
242        // Try raw bincode
243        if let Ok(engram) = bincode::deserialize::<Engram>(&data) {
244            return Ok(engram);
245        }
246
247        // Try JSON
248        if let Ok(json_str) = std::str::from_utf8(&data) {
249            if let Ok(engram) = serde_json::from_str::<Engram>(json_str) {
250                return Ok(engram);
251            }
252        }
253
254        Err(io::Error::other("unable to detect engram format"))
255    }
256
257    /// Try to load Manifest with automatic format detection
258    pub fn auto_load_manifest<P: AsRef<Path>>(path: P) -> io::Result<Manifest> {
259        let data = std::fs::read(path)?;
260
261        // Try JSON first
262        if let Ok(json_str) = std::str::from_utf8(&data) {
263            if let Ok(manifest) = serde_json::from_str::<Manifest>(json_str) {
264                return Ok(manifest);
265            }
266        }
267
268        // Try bincode
269        if let Ok(manifest) = bincode::deserialize::<Manifest>(&data) {
270            return Ok(manifest);
271        }
272
273        Err(io::Error::other("unable to detect manifest format"))
274    }
275}
276
277#[cfg(test)]
278mod tests {
279    use super::*;
280    use tempfile::tempdir;
281
282    #[test]
283    fn test_envelope_adapter_engram() {
284        use embeddenator_fs::CorrectionStore;
285
286        // Create a minimal engram
287        let engram = Engram {
288            root: SparseVec {
289                pos: vec![1, 2, 3],
290                neg: vec![],
291            },
292            codebook: std::collections::HashMap::new(),
293            corrections: CorrectionStore::default(),
294        };
295
296        // Test with no compression
297        let wrapped = EnvelopeAdapter::wrap_engram(&engram, CompressionCodec::None, None).unwrap();
298        let unwrapped = EnvelopeAdapter::unwrap_engram(&wrapped).unwrap();
299        assert_eq!(engram.root.pos, unwrapped.root.pos);
300        assert_eq!(engram.root.neg, unwrapped.root.neg);
301    }
302
303    #[test]
304    #[cfg(feature = "compression-zstd")]
305    fn test_envelope_adapter_zstd_compression() {
306        use embeddenator_fs::CorrectionStore;
307
308        // Create an engram with more data to compress
309        let mut codebook = std::collections::HashMap::new();
310        for i in 0..100 {
311            codebook.insert(
312                i,
313                SparseVec {
314                    pos: vec![i, i + 1, i + 2],
315                    neg: vec![i + 3, i + 4],
316                },
317            );
318        }
319
320        let engram = Engram {
321            root: SparseVec {
322                pos: (0..50).collect(),
323                neg: (50..100).collect(),
324            },
325            codebook,
326            corrections: CorrectionStore::default(),
327        };
328
329        // Test with zstd compression
330        let wrapped =
331            EnvelopeAdapter::wrap_engram(&engram, CompressionCodec::Zstd, Some(3)).unwrap();
332        let unwrapped = EnvelopeAdapter::unwrap_engram(&wrapped).unwrap();
333
334        assert_eq!(engram.root.pos, unwrapped.root.pos);
335        assert_eq!(engram.root.neg, unwrapped.root.neg);
336        assert_eq!(engram.codebook.len(), unwrapped.codebook.len());
337
338        // Verify compression worked (compressed should be smaller)
339        let uncompressed = bincode::serialize(&engram).unwrap();
340        println!(
341            "Uncompressed size: {}, Compressed size: {}",
342            uncompressed.len(),
343            wrapped.len()
344        );
345        assert!(wrapped.len() < uncompressed.len());
346    }
347
348    #[test]
349    #[cfg(feature = "compression-lz4")]
350    fn test_envelope_adapter_lz4_compression() {
351        use embeddenator_fs::CorrectionStore;
352
353        // Create an engram with repeating patterns (good for LZ4)
354        let mut codebook = std::collections::HashMap::new();
355        for i in 0..50 {
356            codebook.insert(
357                i,
358                SparseVec {
359                    pos: vec![1, 2, 3, 4, 5],
360                    neg: vec![6, 7, 8],
361                },
362            );
363        }
364
365        let engram = Engram {
366            root: SparseVec {
367                pos: vec![1, 2, 3, 4, 5, 1, 2, 3, 4, 5],
368                neg: vec![6, 7, 8, 6, 7, 8],
369            },
370            codebook,
371            corrections: CorrectionStore::default(),
372        };
373
374        // Test with lz4 compression
375        let wrapped = EnvelopeAdapter::wrap_engram(&engram, CompressionCodec::Lz4, None).unwrap();
376        let unwrapped = EnvelopeAdapter::unwrap_engram(&wrapped).unwrap();
377
378        assert_eq!(engram.root.pos, unwrapped.root.pos);
379        assert_eq!(engram.root.neg, unwrapped.root.neg);
380        assert_eq!(engram.codebook.len(), unwrapped.codebook.len());
381
382        // Verify compression worked
383        let uncompressed = bincode::serialize(&engram).unwrap();
384        println!(
385            "Uncompressed size: {}, Compressed size: {}",
386            uncompressed.len(),
387            wrapped.len()
388        );
389        assert!(wrapped.len() < uncompressed.len());
390    }
391
392    #[test]
393    fn test_envelope_adapter_sub_engram() {
394        use embeddenator_fs::SubEngram;
395
396        let sub = SubEngram {
397            id: "test_sub".to_string(),
398            root: SparseVec {
399                pos: vec![10, 20, 30],
400                neg: vec![40, 50],
401            },
402            chunk_ids: vec![1, 2, 3],
403            chunk_count: 3,
404            children: vec![],
405        };
406
407        // Test with no compression
408        let wrapped = EnvelopeAdapter::wrap_sub_engram(&sub, CompressionCodec::None, None).unwrap();
409        let unwrapped = EnvelopeAdapter::unwrap_sub_engram(&wrapped).unwrap();
410        assert_eq!(sub.root.pos, unwrapped.root.pos);
411        assert_eq!(sub.root.neg, unwrapped.root.neg);
412        assert_eq!(sub.id, unwrapped.id);
413    }
414
415    #[test]
416    fn test_file_adapter_with_compression() {
417        use embeddenator_fs::CorrectionStore;
418        let dir = tempdir().unwrap();
419
420        let engram = Engram {
421            root: SparseVec {
422                pos: vec![1, 2, 3],
423                neg: vec![4, 5],
424            },
425            codebook: std::collections::HashMap::new(),
426            corrections: CorrectionStore::default(),
427        };
428
429        // Test saving and loading with no compression
430        let path = dir.path().join("engram.bin");
431        FileAdapter::save_engram(&path, &engram, CompressionCodec::None).unwrap();
432        let loaded = FileAdapter::load_engram(&path).unwrap();
433        assert_eq!(engram.root.pos, loaded.root.pos);
434        assert_eq!(engram.root.neg, loaded.root.neg);
435    }
436
437    #[test]
438    #[cfg(feature = "compression")]
439    fn test_compression_round_trip() {
440        use embeddenator_fs::CorrectionStore;
441
442        let engram = Engram {
443            root: SparseVec {
444                pos: (0..100).collect(),
445                neg: (100..200).collect(),
446            },
447            codebook: std::collections::HashMap::new(),
448            corrections: CorrectionStore::default(),
449        };
450
451        // Test all compression codecs
452        for codec in &[
453            CompressionCodec::None,
454            CompressionCodec::Zstd,
455            CompressionCodec::Lz4,
456        ] {
457            let wrapped = EnvelopeAdapter::wrap_engram(&engram, *codec, Some(3)).unwrap();
458            let unwrapped = EnvelopeAdapter::unwrap_engram(&wrapped).unwrap();
459            assert_eq!(
460                engram.root.pos, unwrapped.root.pos,
461                "Failed for codec {:?}",
462                codec
463            );
464            assert_eq!(
465                engram.root.neg, unwrapped.root.neg,
466                "Failed for codec {:?}",
467                codec
468            );
469        }
470    }
471
472    #[test]
473    fn test_file_adapter() {
474        let dir = tempdir().unwrap();
475
476        // Test sparse vec
477        let vec = SparseVec {
478            pos: vec![1, 2, 3],
479            neg: vec![4, 5],
480        };
481        let vec_path = dir.path().join("vec.bin");
482        FileAdapter::save_sparse_vec(&vec_path, &vec).unwrap();
483        let loaded = FileAdapter::load_sparse_vec(&vec_path).unwrap();
484        assert_eq!(vec.pos, loaded.pos);
485        assert_eq!(vec.neg, loaded.neg);
486
487        // Test config
488        let config = ReversibleVSAConfig::default();
489        let config_path = dir.path().join("config.json");
490        FileAdapter::save_vsa_config(&config_path, &config).unwrap();
491        let loaded_config = FileAdapter::load_vsa_config(&config_path).unwrap();
492        assert_eq!(config.block_size, loaded_config.block_size);
493        assert_eq!(config.max_path_depth, loaded_config.max_path_depth);
494    }
495
496    #[test]
497    fn test_batch_adapter() {
498        let config = ReversibleVSAConfig::default();
499        let data_chunks = vec![b"hello".as_slice(), b"world".as_slice()];
500
501        // Batch encode
502        let vectors = BatchAdapter::batch_encode(&data_chunks, &config);
503        assert_eq!(vectors.len(), 2);
504
505        // Batch decode
506        let decoded = BatchAdapter::batch_decode(&vectors, &config, 5);
507        assert_eq!(decoded.len(), 2);
508
509        // Batch similarity
510        let query = SparseVec::new();
511        let similarities = BatchAdapter::batch_similarity(&query, &vectors);
512        assert_eq!(similarities.len(), 2);
513
514        // Batch bundle
515        let bundled = BatchAdapter::batch_bundle(&vectors);
516        assert!(bundled.is_some());
517    }
518
519    #[test]
520    fn test_stream_adapter() {
521        let config = ReversibleVSAConfig::default();
522        let data = b"hello world from streaming";
523        let cursor = io::Cursor::new(data);
524
525        // Stream encode
526        let vectors = StreamAdapter::stream_encode(cursor, &config, 8).unwrap();
527        assert!(!vectors.is_empty());
528
529        // Stream decode
530        let mut output = Vec::new();
531        StreamAdapter::stream_decode(&vectors, &config, 8, &mut output).unwrap();
532        // Note: decoded data may differ from original due to chunking
533        assert!(!output.is_empty());
534    }
535
536    #[test]
537    fn test_auto_format_adapter() {
538        let dir = tempdir().unwrap();
539
540        // Create a manifest and save as JSON
541        let manifest = Manifest {
542            files: Vec::new(),
543            total_chunks: 0,
544        };
545
546        let path = dir.path().join("manifest.json");
547        FileAdapter::save_manifest(&path, &manifest).unwrap();
548
549        // Auto-load should detect JSON
550        let loaded = AutoFormatAdapter::auto_load_manifest(&path).unwrap();
551        assert_eq!(manifest.total_chunks, loaded.total_chunks);
552    }
553}