1use colored::Colorize;
4use rayon::prelude::*;
5use std::fs;
6use std::io::Write;
7use std::path::Path;
8
9use crate::config::Config;
10use crate::{
11 auto_depth, content_coords_blended, layer_to_id, safe_truncate, BLOCK_DATA_SIZE,
12 DEPTH_ENTRY_SIZE, HEADER_SIZE, LAYER_NAMES, META_HEADER_SIZE,
13};
14
15#[repr(C, packed)]
17#[derive(Clone, Copy)]
18pub struct BlockHeader {
19 pub x: f32,
20 pub y: f32,
21 pub z: f32,
22 pub zoom: f32,
23 pub depth: u8,
24 pub layer_id: u8,
25 pub data_offset: u32,
26 pub data_len: u16,
27 pub parent_idx: u32,
28 pub child_count: u16,
29 pub crc16: [u8; 2],
30}
31
32#[repr(C, packed)]
34#[derive(Clone, Copy)]
35#[allow(dead_code)]
36pub struct MetaHeader {
37 pub magic: [u8; 4],
38 pub version: u32,
39 pub block_count: u32,
40 pub depth_count: u32,
41}
42
43pub fn layer_color(id: u8) -> &'static str {
44 match id {
45 0 => "white",
46 1 => "blue",
47 2 => "cyan",
48 3 => "green",
49 4 => "red",
50 5 => "yellow",
51 6 => "magenta",
52 7 => "orange",
53 8 => "lime",
54 9 => "purple",
55 _ => "white",
56 }
57}
58
59#[cfg(target_arch = "x86_64")]
60use std::arch::x86_64::*;
61
62#[inline(always)]
63fn l2_dist_sq_simd(h: &BlockHeader, x: f32, y: f32, z: f32, qz: f32, zw: f32) -> f32 {
64 #[cfg(target_arch = "x86_64")]
65 unsafe {
66 let h_vals = _mm_loadu_ps(h as *const BlockHeader as *const f32);
67 let q_vals = _mm_set_ps(qz, z, y, x);
68 let diff = _mm_sub_ps(h_vals, q_vals);
69 let weights = _mm_set_ps(zw, 1.0, 1.0, 1.0);
70 let weighted_diff = _mm_mul_ps(diff, weights);
71 let sq = _mm_mul_ps(weighted_diff, weighted_diff);
72 let res = _mm_hadd_ps(sq, sq);
73 let res2 = _mm_hadd_ps(res, res);
74 let mut dist = 0.0f32;
75 _mm_store_ss(&mut dist, res2);
76 dist
77 }
78 #[cfg(not(target_arch = "x86_64"))]
79 {
80 let dx = h.x - x;
81 let dy = h.y - y;
82 let dz = h.z - z;
83 let dw = (h.zoom - qz) * zw;
84 dx * dx + dy * dy + dz * dz + dw * dw
85 }
86}
87
88pub enum DataStore {
90 Mmap(memmap2::Mmap),
92 #[cfg(feature = "compression")]
94 InMemory(Vec<u8>),
95}
96
97impl std::ops::Deref for DataStore {
98 type Target = [u8];
99 fn deref(&self) -> &[u8] {
100 match self {
101 DataStore::Mmap(m) => m,
102 #[cfg(feature = "compression")]
103 DataStore::InMemory(v) => v,
104 }
105 }
106}
107
108pub struct MicroscopeReader {
110 pub headers: memmap2::Mmap,
111 pub data: DataStore,
112 pub block_count: usize,
113 pub depth_ranges: [(u32, u32); 9],
114}
115
116impl MicroscopeReader {
117 pub fn open(config: &Config) -> Result<Self, String> {
118 let output_dir = Path::new(&config.paths.output_dir);
119 let meta_path = output_dir.join("meta.bin");
120 let hdr_path = output_dir.join("microscope.bin");
121 let dat_path = output_dir.join("data.bin");
122
123 let meta = fs::read(&meta_path)
124 .map_err(|e| format!("open meta.bin — run 'build' first: {}", e))?;
125 if meta.len() < 12 {
126 return Err("meta.bin too small".to_string());
127 }
128 let magic = &meta[0..4];
129 if magic != b"MSCM" && magic != b"MSC2" && magic != b"MSC3" {
130 return Err("invalid magic: expected MSCM, MSC2 or MSC3".to_string());
131 }
132 let block_count = u32::from_le_bytes(
133 meta[8..12]
134 .try_into()
135 .map_err(|_| "meta.bin: bad block_count bytes")?,
136 ) as usize;
137 let mut depth_ranges = [(0u32, 0u32); 9];
138 for (d, range) in depth_ranges.iter_mut().enumerate() {
139 let off = META_HEADER_SIZE + d * DEPTH_ENTRY_SIZE;
140 if off + 8 > meta.len() {
141 return Err(format!("meta.bin truncated at depth {}", d));
142 }
143 let start = u32::from_le_bytes(
144 meta[off..off + 4]
145 .try_into()
146 .map_err(|_| "meta.bin: bad depth range bytes")?,
147 );
148 let count = u32::from_le_bytes(
149 meta[off + 4..off + 8]
150 .try_into()
151 .map_err(|_| "meta.bin: bad depth range bytes")?,
152 );
153 *range = (start, count);
154 }
155
156 let hdr_file =
157 fs::File::open(&hdr_path).map_err(|e| format!("open microscope.bin: {}", e))?;
158 let headers =
160 unsafe { memmap2::Mmap::map(&hdr_file).map_err(|e| format!("mmap headers: {}", e))? };
161
162 #[cfg(feature = "compression")]
163 let data = {
164 let zst_path = output_dir.join("data.bin.zst");
165 if zst_path.exists()
166 && (!dat_path.exists()
167 || fs::metadata(&zst_path)
168 .and_then(|zm| {
169 fs::metadata(&dat_path).map(|dm| {
170 zm.modified().unwrap_or(std::time::SystemTime::UNIX_EPOCH)
171 > dm.modified().unwrap_or(std::time::SystemTime::UNIX_EPOCH)
172 })
173 })
174 .unwrap_or(false))
175 {
176 let compressed =
177 fs::read(&zst_path).map_err(|e| format!("read data.bin.zst: {}", e))?;
178 let decompressed = zstd::decode_all(std::io::Cursor::new(&compressed))
179 .map_err(|e| format!("zstd decompress: {}", e))?;
180 DataStore::InMemory(decompressed)
181 } else {
182 let dat_file =
183 fs::File::open(&dat_path).map_err(|e| format!("open data.bin: {}", e))?;
184 DataStore::Mmap(unsafe {
186 memmap2::Mmap::map(&dat_file).map_err(|e| format!("mmap data.bin: {}", e))?
187 })
188 }
189 };
190
191 #[cfg(not(feature = "compression"))]
192 let data = {
193 let dat_file =
194 fs::File::open(&dat_path).map_err(|e| format!("open data.bin: {}", e))?;
195 DataStore::Mmap(unsafe {
197 memmap2::Mmap::map(&dat_file).map_err(|e| format!("mmap data.bin: {}", e))?
198 })
199 };
200
201 Ok(MicroscopeReader {
202 headers,
203 data,
204 block_count,
205 depth_ranges,
206 })
207 }
208
209 #[inline(always)]
210 pub fn header(&self, i: usize) -> &BlockHeader {
211 debug_assert!(i < self.block_count);
212 unsafe { &*(self.headers.as_ptr().add(i * HEADER_SIZE) as *const BlockHeader) }
213 }
214
215 #[inline(always)]
216 pub fn text(&self, i: usize) -> &str {
217 let h = self.header(i);
218 let start = h.data_offset as usize;
219 let end = start + h.data_len as usize;
220 std::str::from_utf8(&self.data[start..end]).unwrap_or("<bin>")
221 }
222
223 pub fn look(
225 &self,
226 config: &Config,
227 x: f32,
228 y: f32,
229 z: f32,
230 zoom: u8,
231 k: usize,
232 ) -> Vec<(f32, usize, bool)> {
233 let (start, count) = self.depth_ranges[zoom as usize];
234 let (start, count) = (start as usize, count as usize);
235
236 let mut results: Vec<(f32, usize, bool)> = Vec::with_capacity(count + 10);
237 if count > 0 {
238 for i in start..(start + count) {
239 let h = self.header(i);
240 let dx = h.x - x;
241 let dy = h.y - y;
242 let dz = h.z - z;
243 results.push((dx * dx + dy * dy + dz * dz, i, true));
244 }
245 }
246
247 let append_path = Path::new(&config.paths.output_dir).join("append.bin");
248 let appended = read_append_log(&append_path);
249 for (ai, entry) in appended.iter().enumerate() {
250 if entry.depth != zoom {
251 continue;
252 }
253 let dx = entry.x - x;
254 let dy = entry.y - y;
255 let dz = entry.z - z;
256 results.push((dx * dx + dy * dy + dz * dz, ai + 1_000_000, false));
257 }
258
259 let k = k.min(results.len());
260 if k == 0 {
261 return vec![];
262 }
263 results.select_nth_unstable_by(k - 1, |a, b| a.0.partial_cmp(&b.0).unwrap());
264 results.truncate(k);
265 results.sort_by(|a, b| a.0.partial_cmp(&b.0).unwrap());
266 results
267 }
268
269 #[allow(clippy::too_many_arguments)]
271 pub fn look_soft(
272 &self,
273 config: &Config,
274 x: f32,
275 y: f32,
276 z: f32,
277 zoom: u8,
278 k: usize,
279 zw: f32,
280 ) -> Vec<(f32, usize, bool)> {
281 let qz = zoom as f32 / 8.0;
282 let mut results: Vec<(f32, usize, bool)> = (0..self.block_count)
283 .into_par_iter()
284 .map(|i| {
285 let h = self.header(i);
286 (l2_dist_sq_simd(h, x, y, z, qz, zw), i, true)
287 })
288 .collect();
289
290 let append_path = Path::new(&config.paths.output_dir).join("append.bin");
291 let appended = read_append_log(&append_path);
292 for (ai, entry) in appended.iter().enumerate() {
293 let dx = entry.x - x;
294 let dy = entry.y - y;
295 let dz = entry.z - z;
296 let entry_zoom = entry.depth as f32 / 8.0;
297 let dw = (entry_zoom - qz) * zw;
298 results.push((dx * dx + dy * dy + dz * dz + dw * dw, ai + 1_000_000, false));
299 }
300
301 let k = k.min(results.len());
302 if k == 0 {
303 return vec![];
304 }
305 results.select_nth_unstable_by(k - 1, |a, b| a.0.partial_cmp(&b.0).unwrap());
306 results.truncate(k);
307 results.sort_by(|a, b| a.0.partial_cmp(&b.0).unwrap());
308 results
309 }
310
311 #[allow(clippy::too_many_arguments)]
314 pub fn radial_search(
315 &self,
316 config: &Config,
317 x: f32,
318 y: f32,
319 z: f32,
320 depth: u8,
321 radius: f32,
322 k: usize,
323 ) -> ResultSet {
324 let radius_sq = radius * radius;
325 let (start, count) = self.depth_ranges[depth as usize];
326 let (start, count) = (start as usize, count as usize);
327
328 let mut candidates: Vec<(f32, usize, bool)> = if count > 0 {
330 (start..(start + count))
331 .into_par_iter()
332 .filter_map(|i| {
333 let h = self.header(i);
334 let qz = depth as f32 / 8.0;
335 let dist_sq = l2_dist_sq_simd(h, x, y, z, qz, 0.0); if dist_sq <= radius_sq {
337 Some((dist_sq, i, true))
338 } else {
339 None
340 }
341 })
342 .collect()
343 } else {
344 Vec::new()
345 };
346
347 let append_path = Path::new(&config.paths.output_dir).join("append.bin");
349 let appended = read_append_log(&append_path);
350 for (ai, entry) in appended.iter().enumerate() {
351 if entry.depth != depth {
352 continue;
353 }
354 let dx = entry.x - x;
355 let dy = entry.y - y;
356 let dz = entry.z - z;
357 let dist_sq = dx * dx + dy * dy + dz * dz;
358 if dist_sq <= radius_sq {
359 candidates.push((dist_sq, ai + 1_000_000, false));
360 }
361 }
362
363 candidates.sort_by(|a, b| a.0.partial_cmp(&b.0).unwrap());
364
365 let primary = candidates
367 .first()
368 .map(|&(dist, idx, is_main)| RadialResult {
369 block_idx: idx,
370 dist_sq: dist,
371 weight: 1.0,
372 is_main,
373 });
374
375 let neighbors: Vec<RadialResult> = candidates
376 .iter()
377 .skip(1)
378 .take(k.saturating_sub(1))
379 .map(|&(dist_sq, idx, is_main)| {
380 let weight = if dist_sq > 0.0001 {
382 (radius_sq - dist_sq) / radius_sq
383 } else {
384 1.0
385 };
386 RadialResult {
387 block_idx: idx,
388 dist_sq,
389 weight,
390 is_main,
391 }
392 })
393 .collect();
394
395 let total_within_radius = candidates.len();
396
397 ResultSet {
398 primary,
399 neighbors,
400 center: (x, y, z),
401 depth,
402 radius,
403 total_within_radius,
404 }
405 }
406
407 pub fn find_text(&self, query: &str, k: usize) -> Vec<(u8, usize)> {
409 let q = query.to_lowercase();
410 let mut results: Vec<(u8, usize)> = (0..self.block_count)
411 .into_par_iter()
412 .filter_map(|i| {
413 if self.text(i).to_lowercase().contains(&q) {
414 Some((self.header(i).depth, i))
415 } else {
416 None
417 }
418 })
419 .collect();
420
421 results.sort_by_key(|&(d, _)| d);
422 results.truncate(k);
423 results
424 }
425
426 pub fn print_result(&self, i: usize, dist: f32) {
427 let h = self.header(i);
428 let text = self.text(i);
429 let layer = LAYER_NAMES.get(h.layer_id as usize).unwrap_or(&"?");
430 let preview: String = text.chars().take(70).filter(|&c| c != '\n').collect();
431 println!(
432 " {} {} {} {}",
433 format!("D{}", h.depth).cyan(),
434 format!("L2={:.5}", dist).yellow(),
435 format!("[{}/{}]", layer, layer_color(h.layer_id)).green(),
436 preview
437 );
438 }
439}
440
441#[allow(dead_code)]
444pub struct AppendEntry {
445 pub text: String,
446 pub layer_id: u8,
447 pub importance: u8,
448 pub depth: u8,
449 pub x: f32,
450 pub y: f32,
451 pub z: f32,
452}
453
454pub fn read_append_log(path: &Path) -> Vec<AppendEntry> {
455 if !path.exists() {
456 return vec![];
457 }
458 let data = fs::read(path).unwrap_or_default();
459 if data.is_empty() {
460 return vec![];
461 }
462
463 let mut entries = Vec::new();
464 let mut pos = 0;
465
466 let is_v2 = data.len() >= 4 && &data[0..4] == b"APv2";
467 if is_v2 {
468 pos = 4;
469 }
470
471 let header_size = if is_v2 { 19 } else { 18 };
472
473 while pos + header_size <= data.len() {
474 let len = u32::from_le_bytes(data[pos..pos + 4].try_into().unwrap()) as usize;
475 let lid = data[pos + 4];
476 let imp = data[pos + 5];
477
478 let (depth, coords_start) = if is_v2 {
479 (data[pos + 6], pos + 7)
480 } else {
481 (4u8, pos + 6)
482 };
483
484 let x = f32::from_le_bytes(data[coords_start..coords_start + 4].try_into().unwrap());
485 let y = f32::from_le_bytes(data[coords_start + 4..coords_start + 8].try_into().unwrap());
486 let z = f32::from_le_bytes(
487 data[coords_start + 8..coords_start + 12]
488 .try_into()
489 .unwrap(),
490 );
491 pos += header_size;
492 if pos + len > data.len() {
493 break;
494 }
495 let text = String::from_utf8_lossy(&data[pos..pos + len]).to_string();
496 pos += len;
497 entries.push(AppendEntry {
498 text,
499 layer_id: lid,
500 importance: imp,
501 depth,
502 x,
503 y,
504 z,
505 });
506 }
507 entries
508}
509
510pub fn print_append_result(appended: &[AppendEntry], idx: usize, dist: f32) {
512 let ai = idx - 1_000_000;
513 if ai < appended.len() {
514 let e = &appended[ai];
515 let layer = LAYER_NAMES.get(e.layer_id as usize).unwrap_or(&"?");
516 println!(
517 " {} {} {} {}",
518 format!("D{}", e.depth).cyan(),
519 format!("L2={:.5}", dist).yellow(),
520 format!("[{}/new]", layer).green(),
521 safe_truncate(&e.text, 70)
522 );
523 }
524}
525
526#[derive(Debug, Clone)]
530pub struct RadialResult {
531 pub block_idx: usize,
532 pub dist_sq: f32,
533 pub weight: f32, pub is_main: bool,
535}
536
537#[derive(Debug)]
539pub struct ResultSet {
540 pub primary: Option<RadialResult>,
541 pub neighbors: Vec<RadialResult>,
542 pub center: (f32, f32, f32),
543 pub depth: u8,
544 pub radius: f32,
545 pub total_within_radius: usize,
546}
547
548impl ResultSet {
549 pub fn all(&self) -> Vec<&RadialResult> {
551 let mut v = Vec::with_capacity(1 + self.neighbors.len());
552 if let Some(ref p) = self.primary {
553 v.push(p);
554 }
555 v.extend(self.neighbors.iter());
556 v
557 }
558
559 pub fn block_indices(&self) -> Vec<(u32, f32)> {
561 self.all()
562 .iter()
563 .map(|r| (r.block_idx as u32, r.weight))
564 .collect()
565 }
566}
567
568pub fn store_memory(
569 config: &Config,
570 text: &str,
571 layer: &str,
572 importance: u8,
573) -> Result<(), String> {
574 let t0 = std::time::Instant::now();
575 let (x, y, z) = content_coords_blended(text, layer, config.search.semantic_weight);
576 let lid = layer_to_id(layer);
577 let depth = auto_depth(text);
578
579 let append_path = Path::new(&config.paths.output_dir).join("append.bin");
580
581 let needs_magic = !append_path.exists()
582 || fs::metadata(&append_path)
583 .map(|m| m.len() == 0)
584 .unwrap_or(true);
585
586 let mut file = fs::OpenOptions::new()
587 .create(true)
588 .append(true)
589 .open(&append_path)
590 .map_err(|e| format!("open append log: {}", e))?;
591
592 let write = |f: &mut fs::File, data: &[u8]| -> Result<(), String> {
593 f.write_all(data)
594 .map_err(|e| format!("write append log: {}", e))
595 };
596
597 if needs_magic {
598 write(&mut file, b"APv2")?;
599 }
600
601 let text_bytes = text.as_bytes();
602 let len = text_bytes.len().min(BLOCK_DATA_SIZE);
603
604 write(&mut file, &(len as u32).to_le_bytes())?;
605 write(&mut file, &[lid])?;
606 write(&mut file, &[importance])?;
607 write(&mut file, &[depth])?;
608 write(&mut file, &x.to_le_bytes())?;
609 write(&mut file, &y.to_le_bytes())?;
610 write(&mut file, &z.to_le_bytes())?;
611 write(&mut file, &text_bytes[..len])?;
612
613 let elapsed = t0.elapsed();
614 println!(
615 " {} D{} [{}/{}] ({:.3},{:.3},{:.3}) {}",
616 "STORED".green().bold(),
617 depth,
618 layer,
619 layer_color(lid),
620 x,
621 y,
622 z,
623 safe_truncate(text, 60)
624 );
625 println!(" {} ns", elapsed.as_nanos());
626 Ok(())
627}