Skip to main content

microscope_memory/
reader.rs

1//! MicroscopeReader — high-performance memory-mapped reader for the binary index.
2
3use colored::Colorize;
4use rayon::prelude::*;
5use std::fs;
6use std::io::Write;
7use std::path::Path;
8
9use crate::config::Config;
10use crate::{
11    auto_depth, content_coords_blended, layer_to_id, safe_truncate, BLOCK_DATA_SIZE,
12    DEPTH_ENTRY_SIZE, HEADER_SIZE, LAYER_NAMES, META_HEADER_SIZE,
13};
14
15/// Block header: 32 bytes, packed, mmap-ready.
16#[repr(C, packed)]
17#[derive(Clone, Copy)]
18pub struct BlockHeader {
19    pub x: f32,
20    pub y: f32,
21    pub z: f32,
22    pub zoom: f32,
23    pub depth: u8,
24    pub layer_id: u8,
25    pub data_offset: u32,
26    pub data_len: u16,
27    pub parent_idx: u32,
28    pub child_count: u16,
29    pub crc16: [u8; 2],
30}
31
32// Meta header: 48 bytes at start of meta.bin
33#[repr(C, packed)]
34#[derive(Clone, Copy)]
35#[allow(dead_code)]
36pub struct MetaHeader {
37    pub magic: [u8; 4],
38    pub version: u32,
39    pub block_count: u32,
40    pub depth_count: u32,
41}
42
43pub fn layer_color(id: u8) -> &'static str {
44    match id {
45        0 => "white",
46        1 => "blue",
47        2 => "cyan",
48        3 => "green",
49        4 => "red",
50        5 => "yellow",
51        6 => "magenta",
52        7 => "orange",
53        8 => "lime",
54        9 => "purple",
55        _ => "white",
56    }
57}
58
59#[cfg(target_arch = "x86_64")]
60use std::arch::x86_64::*;
61
62#[inline(always)]
63fn l2_dist_sq_simd(h: &BlockHeader, x: f32, y: f32, z: f32, qz: f32, zw: f32) -> f32 {
64    #[cfg(target_arch = "x86_64")]
65    unsafe {
66        let h_vals = _mm_loadu_ps(h as *const BlockHeader as *const f32);
67        let q_vals = _mm_set_ps(qz, z, y, x);
68        let diff = _mm_sub_ps(h_vals, q_vals);
69        let weights = _mm_set_ps(zw, 1.0, 1.0, 1.0);
70        let weighted_diff = _mm_mul_ps(diff, weights);
71        let sq = _mm_mul_ps(weighted_diff, weighted_diff);
72        let res = _mm_hadd_ps(sq, sq);
73        let res2 = _mm_hadd_ps(res, res);
74        let mut dist = 0.0f32;
75        _mm_store_ss(&mut dist, res2);
76        dist
77    }
78    #[cfg(not(target_arch = "x86_64"))]
79    {
80        let dx = h.x - x;
81        let dy = h.y - y;
82        let dz = h.z - z;
83        let dw = (h.zoom - qz) * zw;
84        dx * dx + dy * dy + dz * dz + dw * dw
85    }
86}
87
88/// Backing store for block data — either memory-mapped or decompressed in-memory.
89pub enum DataStore {
90    /// Normal mmap path (uncompressed data.bin)
91    Mmap(memmap2::Mmap),
92    /// Decompressed data held in memory (from data.bin.zst)
93    #[cfg(feature = "compression")]
94    InMemory(Vec<u8>),
95}
96
97impl std::ops::Deref for DataStore {
98    type Target = [u8];
99    fn deref(&self) -> &[u8] {
100        match self {
101            DataStore::Mmap(m) => m,
102            #[cfg(feature = "compression")]
103            DataStore::InMemory(v) => v,
104        }
105    }
106}
107
108/// High-performance memory-mapped reader for the Microscope index.
109pub struct MicroscopeReader {
110    pub headers: memmap2::Mmap,
111    pub data: DataStore,
112    pub block_count: usize,
113    pub depth_ranges: [(u32, u32); 9],
114}
115
116impl MicroscopeReader {
117    pub fn open(config: &Config) -> Result<Self, String> {
118        let output_dir = Path::new(&config.paths.output_dir);
119        let meta_path = output_dir.join("meta.bin");
120        let hdr_path = output_dir.join("microscope.bin");
121        let dat_path = output_dir.join("data.bin");
122
123        let meta = fs::read(&meta_path)
124            .map_err(|e| format!("open meta.bin — run 'build' first: {}", e))?;
125        if meta.len() < 12 {
126            return Err("meta.bin too small".to_string());
127        }
128        let magic = &meta[0..4];
129        if magic != b"MSCM" && magic != b"MSC2" && magic != b"MSC3" {
130            return Err("invalid magic: expected MSCM, MSC2 or MSC3".to_string());
131        }
132        let block_count = u32::from_le_bytes(
133            meta[8..12]
134                .try_into()
135                .map_err(|_| "meta.bin: bad block_count bytes")?,
136        ) as usize;
137        let mut depth_ranges = [(0u32, 0u32); 9];
138        for (d, range) in depth_ranges.iter_mut().enumerate() {
139            let off = META_HEADER_SIZE + d * DEPTH_ENTRY_SIZE;
140            if off + 8 > meta.len() {
141                return Err(format!("meta.bin truncated at depth {}", d));
142            }
143            let start = u32::from_le_bytes(
144                meta[off..off + 4]
145                    .try_into()
146                    .map_err(|_| "meta.bin: bad depth range bytes")?,
147            );
148            let count = u32::from_le_bytes(
149                meta[off + 4..off + 8]
150                    .try_into()
151                    .map_err(|_| "meta.bin: bad depth range bytes")?,
152            );
153            *range = (start, count);
154        }
155
156        let hdr_file =
157            fs::File::open(&hdr_path).map_err(|e| format!("open microscope.bin: {}", e))?;
158        // Safety: microscope.bin is read-only and will remain valid for the lifetime of MicroscopeReader
159        let headers =
160            unsafe { memmap2::Mmap::map(&hdr_file).map_err(|e| format!("mmap headers: {}", e))? };
161
162        #[cfg(feature = "compression")]
163        let data = {
164            let zst_path = output_dir.join("data.bin.zst");
165            if zst_path.exists()
166                && (!dat_path.exists()
167                    || fs::metadata(&zst_path)
168                        .and_then(|zm| {
169                            fs::metadata(&dat_path).map(|dm| {
170                                zm.modified().unwrap_or(std::time::SystemTime::UNIX_EPOCH)
171                                    > dm.modified().unwrap_or(std::time::SystemTime::UNIX_EPOCH)
172                            })
173                        })
174                        .unwrap_or(false))
175            {
176                let compressed =
177                    fs::read(&zst_path).map_err(|e| format!("read data.bin.zst: {}", e))?;
178                let decompressed = zstd::decode_all(std::io::Cursor::new(&compressed))
179                    .map_err(|e| format!("zstd decompress: {}", e))?;
180                DataStore::InMemory(decompressed)
181            } else {
182                let dat_file =
183                    fs::File::open(&dat_path).map_err(|e| format!("open data.bin: {}", e))?;
184                // Safety: data.bin is read-only and will remain valid for the lifetime of MicroscopeReader
185                DataStore::Mmap(unsafe {
186                    memmap2::Mmap::map(&dat_file).map_err(|e| format!("mmap data.bin: {}", e))?
187                })
188            }
189        };
190
191        #[cfg(not(feature = "compression"))]
192        let data = {
193            let dat_file =
194                fs::File::open(&dat_path).map_err(|e| format!("open data.bin: {}", e))?;
195            // Safety: data.bin is read-only and will remain valid for the lifetime of MicroscopeReader
196            DataStore::Mmap(unsafe {
197                memmap2::Mmap::map(&dat_file).map_err(|e| format!("mmap data.bin: {}", e))?
198            })
199        };
200
201        Ok(MicroscopeReader {
202            headers,
203            data,
204            block_count,
205            depth_ranges,
206        })
207    }
208
209    #[inline(always)]
210    pub fn header(&self, i: usize) -> &BlockHeader {
211        debug_assert!(i < self.block_count);
212        unsafe { &*(self.headers.as_ptr().add(i * HEADER_SIZE) as *const BlockHeader) }
213    }
214
215    #[inline(always)]
216    pub fn text(&self, i: usize) -> &str {
217        let h = self.header(i);
218        let start = h.data_offset as usize;
219        let end = start + h.data_len as usize;
220        std::str::from_utf8(&self.data[start..end]).unwrap_or("<bin>")
221    }
222
223    /// The MICROSCOPE: exact depth + spatial L2 search.
224    pub fn look(
225        &self,
226        config: &Config,
227        x: f32,
228        y: f32,
229        z: f32,
230        zoom: u8,
231        k: usize,
232    ) -> Vec<(f32, usize, bool)> {
233        let (start, count) = self.depth_ranges[zoom as usize];
234        let (start, count) = (start as usize, count as usize);
235
236        let mut results: Vec<(f32, usize, bool)> = Vec::with_capacity(count + 10);
237        if count > 0 {
238            for i in start..(start + count) {
239                let h = self.header(i);
240                let dx = h.x - x;
241                let dy = h.y - y;
242                let dz = h.z - z;
243                results.push((dx * dx + dy * dy + dz * dz, i, true));
244            }
245        }
246
247        let append_path = Path::new(&config.paths.output_dir).join("append.bin");
248        let appended = read_append_log(&append_path);
249        for (ai, entry) in appended.iter().enumerate() {
250            if entry.depth != zoom {
251                continue;
252            }
253            let dx = entry.x - x;
254            let dy = entry.y - y;
255            let dz = entry.z - z;
256            results.push((dx * dx + dy * dy + dz * dz, ai + 1_000_000, false));
257        }
258
259        let k = k.min(results.len());
260        if k == 0 {
261            return vec![];
262        }
263        results.select_nth_unstable_by(k - 1, |a, b| a.0.partial_cmp(&b.0).unwrap());
264        results.truncate(k);
265        results.sort_by(|a, b| a.0.partial_cmp(&b.0).unwrap());
266        results
267    }
268
269    /// 4D soft zoom search with SIMD.
270    #[allow(clippy::too_many_arguments)]
271    pub fn look_soft(
272        &self,
273        config: &Config,
274        x: f32,
275        y: f32,
276        z: f32,
277        zoom: u8,
278        k: usize,
279        zw: f32,
280    ) -> Vec<(f32, usize, bool)> {
281        let qz = zoom as f32 / 8.0;
282        let mut results: Vec<(f32, usize, bool)> = (0..self.block_count)
283            .into_par_iter()
284            .map(|i| {
285                let h = self.header(i);
286                (l2_dist_sq_simd(h, x, y, z, qz, zw), i, true)
287            })
288            .collect();
289
290        let append_path = Path::new(&config.paths.output_dir).join("append.bin");
291        let appended = read_append_log(&append_path);
292        for (ai, entry) in appended.iter().enumerate() {
293            let dx = entry.x - x;
294            let dy = entry.y - y;
295            let dz = entry.z - z;
296            let entry_zoom = entry.depth as f32 / 8.0;
297            let dw = (entry_zoom - qz) * zw;
298            results.push((dx * dx + dy * dy + dz * dz + dw * dw, ai + 1_000_000, false));
299        }
300
301        let k = k.min(results.len());
302        if k == 0 {
303            return vec![];
304        }
305        results.select_nth_unstable_by(k - 1, |a, b| a.0.partial_cmp(&b.0).unwrap());
306        results.truncate(k);
307        results.sort_by(|a, b| a.0.partial_cmp(&b.0).unwrap());
308        results
309    }
310
311    /// Radial search: find all blocks within `radius` of (x, y, z) at a specific depth.
312    /// Returns a ResultSet with the closest match as primary and neighbors distance-weighted.
313    #[allow(clippy::too_many_arguments)]
314    pub fn radial_search(
315        &self,
316        config: &Config,
317        x: f32,
318        y: f32,
319        z: f32,
320        depth: u8,
321        radius: f32,
322        k: usize,
323    ) -> ResultSet {
324        let radius_sq = radius * radius;
325        let (start, count) = self.depth_ranges[depth as usize];
326        let (start, count) = (start as usize, count as usize);
327
328        // SIMD-accelerated radial scan within depth band
329        let mut candidates: Vec<(f32, usize, bool)> = if count > 0 {
330            (start..(start + count))
331                .into_par_iter()
332                .filter_map(|i| {
333                    let h = self.header(i);
334                    let qz = depth as f32 / 8.0;
335                    let dist_sq = l2_dist_sq_simd(h, x, y, z, qz, 0.0); // no zoom weight for radial
336                    if dist_sq <= radius_sq {
337                        Some((dist_sq, i, true))
338                    } else {
339                        None
340                    }
341                })
342                .collect()
343        } else {
344            Vec::new()
345        };
346
347        // Include append log entries at the same depth
348        let append_path = Path::new(&config.paths.output_dir).join("append.bin");
349        let appended = read_append_log(&append_path);
350        for (ai, entry) in appended.iter().enumerate() {
351            if entry.depth != depth {
352                continue;
353            }
354            let dx = entry.x - x;
355            let dy = entry.y - y;
356            let dz = entry.z - z;
357            let dist_sq = dx * dx + dy * dy + dz * dz;
358            if dist_sq <= radius_sq {
359                candidates.push((dist_sq, ai + 1_000_000, false));
360            }
361        }
362
363        candidates.sort_by(|a, b| a.0.partial_cmp(&b.0).unwrap());
364
365        // Build ResultSet
366        let primary = candidates
367            .first()
368            .map(|&(dist, idx, is_main)| RadialResult {
369                block_idx: idx,
370                dist_sq: dist,
371                weight: 1.0,
372                is_main,
373            });
374
375        let neighbors: Vec<RadialResult> = candidates
376            .iter()
377            .skip(1)
378            .take(k.saturating_sub(1))
379            .map(|&(dist_sq, idx, is_main)| {
380                // Weight: inverse distance, normalized so closest neighbor = 1.0
381                let weight = if dist_sq > 0.0001 {
382                    (radius_sq - dist_sq) / radius_sq
383                } else {
384                    1.0
385                };
386                RadialResult {
387                    block_idx: idx,
388                    dist_sq,
389                    weight,
390                    is_main,
391                }
392            })
393            .collect();
394
395        let total_within_radius = candidates.len();
396
397        ResultSet {
398            primary,
399            neighbors,
400            center: (x, y, z),
401            depth,
402            radius,
403            total_within_radius,
404        }
405    }
406
407    /// Text search
408    pub fn find_text(&self, query: &str, k: usize) -> Vec<(u8, usize)> {
409        let q = query.to_lowercase();
410        let mut results: Vec<(u8, usize)> = (0..self.block_count)
411            .into_par_iter()
412            .filter_map(|i| {
413                if self.text(i).to_lowercase().contains(&q) {
414                    Some((self.header(i).depth, i))
415                } else {
416                    None
417                }
418            })
419            .collect();
420
421        results.sort_by_key(|&(d, _)| d);
422        results.truncate(k);
423        results
424    }
425
426    pub fn print_result(&self, i: usize, dist: f32) {
427        let h = self.header(i);
428        let text = self.text(i);
429        let layer = LAYER_NAMES.get(h.layer_id as usize).unwrap_or(&"?");
430        let preview: String = text.chars().take(70).filter(|&c| c != '\n').collect();
431        println!(
432            "  {} {} {} {}",
433            format!("D{}", h.depth).cyan(),
434            format!("L2={:.5}", dist).yellow(),
435            format!("[{}/{}]", layer, layer_color(h.layer_id)).green(),
436            preview
437        );
438    }
439}
440
441// ─── APPEND LOG ──────────────────────────────────────
442
443#[allow(dead_code)]
444pub struct AppendEntry {
445    pub text: String,
446    pub layer_id: u8,
447    pub importance: u8,
448    pub depth: u8,
449    pub x: f32,
450    pub y: f32,
451    pub z: f32,
452}
453
454pub fn read_append_log(path: &Path) -> Vec<AppendEntry> {
455    if !path.exists() {
456        return vec![];
457    }
458    let data = fs::read(path).unwrap_or_default();
459    if data.is_empty() {
460        return vec![];
461    }
462
463    let mut entries = Vec::new();
464    let mut pos = 0;
465
466    let is_v2 = data.len() >= 4 && &data[0..4] == b"APv2";
467    if is_v2 {
468        pos = 4;
469    }
470
471    let header_size = if is_v2 { 19 } else { 18 };
472
473    while pos + header_size <= data.len() {
474        let len = u32::from_le_bytes(data[pos..pos + 4].try_into().unwrap()) as usize;
475        let lid = data[pos + 4];
476        let imp = data[pos + 5];
477
478        let (depth, coords_start) = if is_v2 {
479            (data[pos + 6], pos + 7)
480        } else {
481            (4u8, pos + 6)
482        };
483
484        let x = f32::from_le_bytes(data[coords_start..coords_start + 4].try_into().unwrap());
485        let y = f32::from_le_bytes(data[coords_start + 4..coords_start + 8].try_into().unwrap());
486        let z = f32::from_le_bytes(
487            data[coords_start + 8..coords_start + 12]
488                .try_into()
489                .unwrap(),
490        );
491        pos += header_size;
492        if pos + len > data.len() {
493            break;
494        }
495        let text = String::from_utf8_lossy(&data[pos..pos + len]).to_string();
496        pos += len;
497        entries.push(AppendEntry {
498            text,
499            layer_id: lid,
500            importance: imp,
501            depth,
502            x,
503            y,
504            z,
505        });
506    }
507    entries
508}
509
510/// Display a single append-log result entry.
511pub fn print_append_result(appended: &[AppendEntry], idx: usize, dist: f32) {
512    let ai = idx - 1_000_000;
513    if ai < appended.len() {
514        let e = &appended[ai];
515        let layer = LAYER_NAMES.get(e.layer_id as usize).unwrap_or(&"?");
516        println!(
517            "  {} {} {} {}",
518            format!("D{}", e.depth).cyan(),
519            format!("L2={:.5}", dist).yellow(),
520            format!("[{}/new]", layer).green(),
521            safe_truncate(&e.text, 70)
522        );
523    }
524}
525
526// ─── RADIAL SEARCH TYPES ─────────────────────────────
527
528/// A single result from radial search.
529#[derive(Debug, Clone)]
530pub struct RadialResult {
531    pub block_idx: usize,
532    pub dist_sq: f32,
533    pub weight: f32, // 1.0 = primary, decays with distance for neighbors
534    pub is_main: bool,
535}
536
537/// ResultSet from radial search: primary hit + distance-weighted neighbors.
538#[derive(Debug)]
539pub struct ResultSet {
540    pub primary: Option<RadialResult>,
541    pub neighbors: Vec<RadialResult>,
542    pub center: (f32, f32, f32),
543    pub depth: u8,
544    pub radius: f32,
545    pub total_within_radius: usize,
546}
547
548impl ResultSet {
549    /// All results (primary + neighbors) as a flat list.
550    pub fn all(&self) -> Vec<&RadialResult> {
551        let mut v = Vec::with_capacity(1 + self.neighbors.len());
552        if let Some(ref p) = self.primary {
553            v.push(p);
554        }
555        v.extend(self.neighbors.iter());
556        v
557    }
558
559    /// Block indices of all results (for Hebbian co-activation).
560    pub fn block_indices(&self) -> Vec<(u32, f32)> {
561        self.all()
562            .iter()
563            .map(|r| (r.block_idx as u32, r.weight))
564            .collect()
565    }
566}
567
568pub fn store_memory(
569    config: &Config,
570    text: &str,
571    layer: &str,
572    importance: u8,
573) -> Result<(), String> {
574    let t0 = std::time::Instant::now();
575    let (x, y, z) = content_coords_blended(text, layer, config.search.semantic_weight);
576    let lid = layer_to_id(layer);
577    let depth = auto_depth(text);
578
579    let append_path = Path::new(&config.paths.output_dir).join("append.bin");
580
581    let needs_magic = !append_path.exists()
582        || fs::metadata(&append_path)
583            .map(|m| m.len() == 0)
584            .unwrap_or(true);
585
586    let mut file = fs::OpenOptions::new()
587        .create(true)
588        .append(true)
589        .open(&append_path)
590        .map_err(|e| format!("open append log: {}", e))?;
591
592    let write = |f: &mut fs::File, data: &[u8]| -> Result<(), String> {
593        f.write_all(data)
594            .map_err(|e| format!("write append log: {}", e))
595    };
596
597    if needs_magic {
598        write(&mut file, b"APv2")?;
599    }
600
601    let text_bytes = text.as_bytes();
602    let len = text_bytes.len().min(BLOCK_DATA_SIZE);
603
604    write(&mut file, &(len as u32).to_le_bytes())?;
605    write(&mut file, &[lid])?;
606    write(&mut file, &[importance])?;
607    write(&mut file, &[depth])?;
608    write(&mut file, &x.to_le_bytes())?;
609    write(&mut file, &y.to_le_bytes())?;
610    write(&mut file, &z.to_le_bytes())?;
611    write(&mut file, &text_bytes[..len])?;
612
613    let elapsed = t0.elapsed();
614    println!(
615        "  {} D{} [{}/{}] ({:.3},{:.3},{:.3}) {}",
616        "STORED".green().bold(),
617        depth,
618        layer,
619        layer_color(lid),
620        x,
621        y,
622        z,
623        safe_truncate(text, 60)
624    );
625    println!("  {} ns", elapsed.as_nanos());
626    Ok(())
627}