Skip to main content

microscope_memory/
snapshot.rs

1//! Snapshot: .mscope archive format for backup/restore/diff.
2//!
3//! Format:
4//!   [magic "MSEX" 4B][version u32][file_count u32]
5//!   Per file: [name_len u16][name bytes][data_len u64][data bytes]
6
7use std::fs;
8use std::io::{BufReader, BufWriter, Read, Write};
9use std::path::Path;
10
11const MAGIC: &[u8; 4] = b"MSEX";
12const VERSION: u32 = 1;
13
14/// Files that compose a microscope index.
15const INDEX_FILES: &[&str] = &[
16    "meta.bin",
17    "microscope.bin",
18    "data.bin",
19    "merkle.bin",
20    "append.bin",
21    "embeddings.bin",
22];
23
24/// Export all index files from output_dir into a single .mscope archive.
25pub fn export(output_dir: &Path, archive_path: &Path) -> Result<(), String> {
26    let mut files: Vec<(String, Vec<u8>)> = Vec::new();
27
28    for &name in INDEX_FILES {
29        let path = output_dir.join(name);
30        if path.exists() {
31            let data = fs::read(&path).map_err(|e| format!("read {}: {}", name, e))?;
32            files.push((name.to_string(), data));
33        }
34    }
35
36    if files.is_empty() {
37        return Err("no index files found to export".to_string());
38    }
39
40    let f = fs::File::create(archive_path).map_err(|e| format!("create archive: {}", e))?;
41    let mut w = BufWriter::new(f);
42
43    // Header
44    w.write_all(MAGIC).map_err(|e| e.to_string())?;
45    w.write_all(&VERSION.to_le_bytes())
46        .map_err(|e| e.to_string())?;
47    w.write_all(&(files.len() as u32).to_le_bytes())
48        .map_err(|e| e.to_string())?;
49
50    // Files
51    let mut total_size = 12u64; // header
52    for (name, data) in &files {
53        let name_bytes = name.as_bytes();
54        w.write_all(&(name_bytes.len() as u16).to_le_bytes())
55            .map_err(|e| e.to_string())?;
56        w.write_all(name_bytes).map_err(|e| e.to_string())?;
57        w.write_all(&(data.len() as u64).to_le_bytes())
58            .map_err(|e| e.to_string())?;
59        w.write_all(data).map_err(|e| e.to_string())?;
60        total_size += 2 + name_bytes.len() as u64 + 8 + data.len() as u64;
61    }
62
63    w.flush().map_err(|e| e.to_string())?;
64
65    println!(
66        "  Exported {} files ({:.1} KB) → {}",
67        files.len(),
68        total_size as f64 / 1024.0,
69        archive_path.display()
70    );
71    for (name, data) in &files {
72        println!("    {}: {:.1} KB", name, data.len() as f64 / 1024.0);
73    }
74
75    Ok(())
76}
77
78/// Import a .mscope archive into output_dir.
79pub fn import(archive_path: &Path, output_dir: &Path) -> Result<(), String> {
80    let f = fs::File::open(archive_path).map_err(|e| format!("open archive: {}", e))?;
81    let mut r = BufReader::new(f);
82
83    // Header
84    let mut magic = [0u8; 4];
85    r.read_exact(&mut magic).map_err(|e| e.to_string())?;
86    if &magic != MAGIC {
87        return Err(format!("invalid magic: expected MSEX, got {:?}", magic));
88    }
89
90    let mut ver_buf = [0u8; 4];
91    r.read_exact(&mut ver_buf).map_err(|e| e.to_string())?;
92    let version = u32::from_le_bytes(ver_buf);
93    if version > VERSION {
94        return Err(format!(
95            "unsupported version: {} (max: {})",
96            version, VERSION
97        ));
98    }
99
100    let mut count_buf = [0u8; 4];
101    r.read_exact(&mut count_buf).map_err(|e| e.to_string())?;
102    let file_count = u32::from_le_bytes(count_buf);
103
104    fs::create_dir_all(output_dir).map_err(|e| format!("create output dir: {}", e))?;
105
106    for _ in 0..file_count {
107        // Name
108        let mut name_len_buf = [0u8; 2];
109        r.read_exact(&mut name_len_buf).map_err(|e| e.to_string())?;
110        let name_len = u16::from_le_bytes(name_len_buf) as usize;
111        let mut name_buf = vec![0u8; name_len];
112        r.read_exact(&mut name_buf).map_err(|e| e.to_string())?;
113        let name = String::from_utf8(name_buf).map_err(|e| e.to_string())?;
114
115        // Data
116        let mut data_len_buf = [0u8; 8];
117        r.read_exact(&mut data_len_buf).map_err(|e| e.to_string())?;
118        let data_len = u64::from_le_bytes(data_len_buf) as usize;
119        let mut data = vec![0u8; data_len];
120        r.read_exact(&mut data).map_err(|e| e.to_string())?;
121
122        // Sanitize filename (only allow known index files)
123        if !INDEX_FILES.contains(&name.as_str()) {
124            println!("    Skipping unknown file: {}", name);
125            continue;
126        }
127
128        let out_path = output_dir.join(&name);
129        fs::write(&out_path, &data).map_err(|e| format!("write {}: {}", name, e))?;
130        println!("    {}: {:.1} KB", name, data.len() as f64 / 1024.0);
131    }
132
133    println!("  Imported {} files → {}", file_count, output_dir.display());
134    Ok(())
135}
136
137/// Compare two .mscope archives: Merkle root + per-file size diff.
138pub fn diff(a_path: &Path, b_path: &Path) -> Result<(), String> {
139    let a_files = read_archive(a_path)?;
140    let b_files = read_archive(b_path)?;
141
142    println!("  {} vs {}", a_path.display(), b_path.display());
143
144    // Compare Merkle roots if both have meta.bin
145    let a_root = extract_merkle_root(&a_files);
146    let b_root = extract_merkle_root(&b_files);
147    match (a_root, b_root) {
148        (Some(ar), Some(br)) => {
149            if ar == br {
150                println!("  Merkle root: {} (identical)", hex_str(&ar));
151            } else {
152                println!("  Merkle root A: {}", hex_str(&ar));
153                println!("  Merkle root B: {}", hex_str(&br));
154                println!("  DIFF Merkle roots differ — data changed");
155            }
156        }
157        _ => println!("  (cannot compare Merkle roots — meta.bin missing)"),
158    }
159
160    // Per-file size comparison
161    let all_names: std::collections::BTreeSet<&str> = a_files
162        .keys()
163        .chain(b_files.keys())
164        .map(|s| s.as_str())
165        .collect();
166
167    for name in all_names {
168        let a_size = a_files.get(name).map(|d| d.len());
169        let b_size = b_files.get(name).map(|d| d.len());
170        match (a_size, b_size) {
171            (Some(a), Some(b)) => {
172                let delta = b as i64 - a as i64;
173                let sign = if delta >= 0 { "+" } else { "" };
174                let status = if a == b { "=" } else { "~" };
175                println!(
176                    "  {} {}: {} → {} ({}{} bytes)",
177                    status, name, a, b, sign, delta
178                );
179            }
180            (Some(a), None) => println!("  - {}: {} (removed)", name, a),
181            (None, Some(b)) => println!("  + {}: {} (added)", name, b),
182            (None, None) => {}
183        }
184    }
185
186    // Block count comparison
187    let a_blocks = extract_block_count(&a_files);
188    let b_blocks = extract_block_count(&b_files);
189    if let (Some(a), Some(b)) = (a_blocks, b_blocks) {
190        println!(
191            "  Blocks: {} → {} ({}{})",
192            a,
193            b,
194            if b >= a { "+" } else { "" },
195            b as i64 - a as i64
196        );
197    }
198
199    Ok(())
200}
201
202fn read_archive(path: &Path) -> Result<std::collections::HashMap<String, Vec<u8>>, String> {
203    let f = fs::File::open(path).map_err(|e| format!("open {}: {}", path.display(), e))?;
204    let mut r = BufReader::new(f);
205    let mut magic = [0u8; 4];
206    r.read_exact(&mut magic).map_err(|e| e.to_string())?;
207    if &magic != MAGIC {
208        return Err(format!("invalid magic in {}", path.display()));
209    }
210
211    let mut ver_buf = [0u8; 4];
212    r.read_exact(&mut ver_buf).map_err(|e| e.to_string())?;
213    let mut count_buf = [0u8; 4];
214    r.read_exact(&mut count_buf).map_err(|e| e.to_string())?;
215    let file_count = u32::from_le_bytes(count_buf);
216
217    let mut files = std::collections::HashMap::new();
218    for _ in 0..file_count {
219        let mut name_len_buf = [0u8; 2];
220        r.read_exact(&mut name_len_buf).map_err(|e| e.to_string())?;
221        let name_len = u16::from_le_bytes(name_len_buf) as usize;
222        let mut name_buf = vec![0u8; name_len];
223        r.read_exact(&mut name_buf).map_err(|e| e.to_string())?;
224        let name = String::from_utf8(name_buf).map_err(|e| e.to_string())?;
225
226        let mut data_len_buf = [0u8; 8];
227        r.read_exact(&mut data_len_buf).map_err(|e| e.to_string())?;
228        let data_len = u64::from_le_bytes(data_len_buf) as usize;
229        let mut data = vec![0u8; data_len];
230        r.read_exact(&mut data).map_err(|e| e.to_string())?;
231
232        files.insert(name, data);
233    }
234    Ok(files)
235}
236
237fn extract_merkle_root(files: &std::collections::HashMap<String, Vec<u8>>) -> Option<[u8; 32]> {
238    let meta = files.get("meta.bin")?;
239    if meta.len() < 4 || &meta[0..4] != b"MSC2" {
240        return None;
241    }
242    let offset = crate::META_HEADER_SIZE + 9 * crate::DEPTH_ENTRY_SIZE;
243    if meta.len() < offset + 32 {
244        return None;
245    }
246    let mut root = [0u8; 32];
247    root.copy_from_slice(&meta[offset..offset + 32]);
248    Some(root)
249}
250
251fn extract_block_count(files: &std::collections::HashMap<String, Vec<u8>>) -> Option<u32> {
252    let meta = files.get("meta.bin")?;
253    if meta.len() < 12 {
254        return None;
255    }
256    Some(u32::from_le_bytes(meta[8..12].try_into().ok()?))
257}
258
259fn hex_str(bytes: &[u8]) -> String {
260    bytes
261        .iter()
262        .map(|b| format!("{:02x}", b))
263        .collect::<Vec<_>>()
264        .join("")
265}
266
267#[cfg(test)]
268mod tests {
269    use super::*;
270    #[test]
271    fn test_export_import_roundtrip() {
272        let dir = std::env::temp_dir().join("mscope_snap_test");
273        let _ = fs::create_dir_all(&dir);
274
275        // Create fake index files
276        let src_dir = dir.join("src");
277        let _ = fs::create_dir_all(&src_dir);
278        fs::write(src_dir.join("meta.bin"), b"MSC2testdata1234").unwrap();
279        fs::write(src_dir.join("microscope.bin"), b"headers_here").unwrap();
280        fs::write(src_dir.join("data.bin"), b"block_data_here").unwrap();
281
282        // Export
283        let archive = dir.join("test.mscope");
284        export(&src_dir, &archive).unwrap();
285        assert!(archive.exists());
286
287        // Import
288        let dst_dir = dir.join("dst");
289        import(&archive, &dst_dir).unwrap();
290
291        // Verify
292        assert_eq!(
293            fs::read(dst_dir.join("meta.bin")).unwrap(),
294            b"MSC2testdata1234"
295        );
296        assert_eq!(
297            fs::read(dst_dir.join("microscope.bin")).unwrap(),
298            b"headers_here"
299        );
300        assert_eq!(
301            fs::read(dst_dir.join("data.bin")).unwrap(),
302            b"block_data_here"
303        );
304
305        let _ = fs::remove_dir_all(&dir);
306    }
307}