1use crate::config::Config;
6use crate::reader::{BlockHeader, MicroscopeReader};
7use crate::{
8 content_coords_blended, crc16_ccitt, hex_str, layer_to_id, merkle, safe_truncate, to_block,
9 BLOCK_DATA_SIZE, DEPTH_ENTRY_SIZE, HEADER_SIZE, META_HEADER_SIZE,
10};
11
12use colored::Colorize;
13use rayon::prelude::*;
14use sha2::{Digest, Sha256};
15use std::fs;
16use std::io::{BufWriter, Seek, Write};
17use std::path::Path;
18
19struct RawBlock {
21 data: Vec<u8>,
22 depth: u8,
23 x: f32,
24 y: f32,
25 z: f32,
26 layer_id: u8,
27 parent_idx: u32,
28 child_count: u16,
29}
30
31fn extract_texts_from_file(path: &Path) -> Vec<String> {
36 let mut texts = Vec::new();
37 let raw = match fs::read_to_string(path) {
38 Ok(s) => s,
39 Err(_) => return texts,
40 };
41
42 for chunk in raw.split("\n\n") {
44 let trimmed = chunk.trim();
45 if trimmed.len() > 3 {
46 texts.push(trimmed.to_string());
47 }
48 }
49
50 if texts.len() < 2 {
52 texts.clear();
53 let chars: Vec<char> = raw.chars().collect();
54 for chunk in chars.chunks(BLOCK_DATA_SIZE) {
55 let s: String = chunk.iter().collect();
56 if s.trim().len() > 5 {
57 texts.push(s);
58 }
59 }
60 }
61
62 texts
63}
64
65fn split_sentences(text: &str) -> Vec<String> {
67 let mut sentences = Vec::new();
68 let mut current = String::new();
69 for ch in text.chars() {
70 current.push(ch);
71 if matches!(ch, '.' | '!' | '?' | '\n') && current.len() > 10 {
72 sentences.push(current.trim().to_string());
73 current = String::new();
74 }
75 }
76 if current.trim().len() > 5 {
77 sentences.push(current.trim().to_string());
78 }
79 sentences
80}
81
82pub fn compute_layers_hash(config: &Config) -> [u8; 32] {
84 let layers_dir = Path::new(&config.paths.layers_dir);
85 let layer_files = &config.memory_layers.layers;
86 let mut sorted_names: Vec<&String> = layer_files.iter().collect();
87 sorted_names.sort();
88 let mut hasher = Sha256::new();
89 for name in &sorted_names {
90 let path = layers_dir.join(format!("{}.txt", name));
91 if let Ok(contents) = fs::read(&path) {
92 hasher.update(&contents);
93 }
94 }
95 let result = hasher.finalize();
96 let mut hash = [0u8; 32];
97 hash.copy_from_slice(&result);
98 hash
99}
100
101pub fn build(config: &Config, force: bool) -> Result<(), String> {
103 let layers_hash = compute_layers_hash(config);
104
105 if !force {
107 let output_dir = Path::new(&config.paths.output_dir);
108 let meta_path = output_dir.join("meta.bin");
109 if let Ok(meta) = fs::read(&meta_path) {
110 if meta.len() >= 152 && &meta[0..4] == b"MSC3" {
111 let stored_hash = &meta[120..152];
112 if stored_hash == &layers_hash[..] {
113 println!("{}", "Layers unchanged — skipping rebuild".green().bold());
114 return Ok(());
115 }
116 }
117 }
118 }
119
120 println!(
121 "{}",
122 "Building microscope from raw layers (zero JSON)..."
123 .cyan()
124 .bold()
125 );
126
127 let layers_dir = Path::new(&config.paths.layers_dir);
128 let output_dir = Path::new(&config.paths.output_dir);
129
130 if !output_dir.exists() {
131 fs::create_dir_all(output_dir).map_err(|e| format!("create output dir: {}", e))?;
132 }
133
134 let layer_files = &config.memory_layers.layers;
135
136 let mut layer_texts: Vec<(String, Vec<String>)> = Vec::new();
138 for name in layer_files {
139 let path = layers_dir.join(format!("{}.txt", name));
140 let texts = extract_texts_from_file(&path);
141 println!(" {} {}: {} items", ">".green(), name, texts.len());
142 layer_texts.push((name.clone(), texts));
143 }
144
145 let mut blocks: Vec<RawBlock> = Vec::new();
146
147 let identity = "Microscope Memory: 9-depth hierarchical cognitive engine. Binary mmap, sub-microsecond spatial search, Hebbian learning, Merkle integrity.";
149 blocks.push(RawBlock {
150 data: to_block(identity),
151 depth: 0,
152 x: 0.25,
153 y: 0.25,
154 z: 0.25,
155 layer_id: 0,
156 parent_idx: u32::MAX,
157 child_count: layer_files.len() as u16,
158 });
159
160 let sw = config.search.semantic_weight;
162 let depth1_start = blocks.len();
163 for (name, texts) in &layer_texts {
164 let preview: Vec<String> = texts.iter().take(3).map(|s| safe_truncate(s, 40)).collect();
165 let summary = format!("[{}] {} elem. {}", name, texts.len(), preview.join(" | "));
166 let (x, y, z) = content_coords_blended(name, name, sw);
167 blocks.push(RawBlock {
168 data: to_block(&summary),
169 depth: 1,
170 x,
171 y,
172 z,
173 layer_id: layer_to_id(name),
174 parent_idx: 0,
175 child_count: texts.len().div_ceil(5) as u16, });
177 }
178
179 let _depth2_start = blocks.len();
181 let mut depth2_layer_offsets: Vec<(usize, usize)> = Vec::new(); for (li, (name, texts)) in layer_texts.iter().enumerate() {
183 let cluster_start = blocks.len();
184 for ci in (0..texts.len()).step_by(5) {
185 let chunk: Vec<String> = texts[ci..texts.len().min(ci + 5)]
186 .iter()
187 .map(|s| safe_truncate(s, 40))
188 .collect();
189 let summary = format!("[{} #{}] {}", name, ci / 5, chunk.join(" | "));
190 let (x, y, z) = content_coords_blended(&summary, name, sw);
191 blocks.push(RawBlock {
192 data: to_block(&summary),
193 depth: 2,
194 x,
195 y,
196 z,
197 layer_id: layer_to_id(name),
198 parent_idx: (depth1_start + li) as u32,
199 child_count: chunk.len() as u16,
200 });
201 }
202 depth2_layer_offsets.push((cluster_start, blocks.len() - cluster_start));
203 }
204
205 let depth3_start = blocks.len();
207 let mut depth3_positions: Vec<(f32, f32, f32)> = Vec::new();
208 for (li, (name, texts)) in layer_texts.iter().enumerate() {
209 for (ti, text) in texts.iter().enumerate() {
210 let (x, y, z) = content_coords_blended(text, name, sw);
211 let cluster_idx = ti / 5;
212 let (d2_start, d2_count) = depth2_layer_offsets[li];
213 let parent = if cluster_idx < d2_count {
214 (d2_start + cluster_idx) as u32
215 } else {
216 u32::MAX
217 };
218
219 blocks.push(RawBlock {
220 data: to_block(text),
221 depth: 3,
222 x,
223 y,
224 z,
225 layer_id: layer_to_id(name),
226 parent_idx: parent,
227 child_count: 0, });
229 depth3_positions.push((x, y, z));
230 }
231 }
232
233 let _depth4_start = blocks.len();
235 let mut depth4_parents: Vec<usize> = Vec::new();
236
237 let d4_results: Vec<Vec<RawBlock>> = (depth3_start..(depth3_start + depth3_positions.len()))
238 .into_par_iter()
239 .map(|d3i| {
240 let text = std::str::from_utf8(&blocks[d3i].data).unwrap_or("");
241 let sentences = split_sentences(text);
242 let mut local_blocks = Vec::new();
243 for sent in &sentences {
244 if sent.len() < 10 {
245 continue;
246 }
247 let (px, py, pz) = depth3_positions[d3i - depth3_start];
248 let h = sent
249 .as_bytes()
250 .iter()
251 .fold(0u64, |a, &b| a.wrapping_mul(31).wrapping_add(b as u64));
252 let ox = ((h & 0xFF) as f32 - 128.0) / 25500.0;
253 let oy = (((h >> 8) & 0xFF) as f32 - 128.0) / 25500.0;
254 let oz = (((h >> 16) & 0xFF) as f32 - 128.0) / 25500.0;
255
256 local_blocks.push(RawBlock {
257 data: to_block(sent),
258 depth: 4,
259 x: px + ox,
260 y: py + oy,
261 z: pz + oz,
262 layer_id: blocks[d3i].layer_id,
263 parent_idx: d3i as u32,
264 child_count: 0,
265 });
266 }
267 local_blocks
268 })
269 .collect();
270
271 for (i, local) in d4_results.into_iter().enumerate() {
272 let d3i = depth3_start + i;
273 blocks[d3i].child_count = local.len() as u16;
274 for b in local {
275 blocks.push(b);
276 depth4_parents.push(blocks.len() - 1);
277 }
278 }
279
280 let mut depth5_parents: Vec<usize> = Vec::new();
282 let depth4_parents_clone = depth4_parents.clone();
283 let d5_results: Vec<Vec<RawBlock>> = depth4_parents
284 .into_par_iter()
285 .map(|d4i| {
286 let text_owned = String::from_utf8_lossy(&blocks[d4i].data).to_string();
287 let px = blocks[d4i].x;
288 let py = blocks[d4i].y;
289 let pz = blocks[d4i].z;
290 let lid = blocks[d4i].layer_id;
291
292 let tokens: Vec<String> = text_owned
293 .split_whitespace()
294 .take(8)
295 .map(|s| s.to_string())
296 .collect();
297 let mut local_blocks = Vec::new();
298 for tok in &tokens {
299 if tok.len() < 2 {
300 continue;
301 }
302 let h = tok
303 .as_bytes()
304 .iter()
305 .fold(0u64, |a, &b| a.wrapping_mul(31).wrapping_add(b as u64));
306 let ox = ((h & 0xFF) as f32 - 128.0) / 255000.0;
307 let oy = (((h >> 8) & 0xFF) as f32 - 128.0) / 255000.0;
308 let oz = (((h >> 16) & 0xFF) as f32 - 128.0) / 255000.0;
309
310 local_blocks.push(RawBlock {
311 data: to_block(tok),
312 depth: 5,
313 x: px + ox,
314 y: py + oy,
315 z: pz + oz,
316 layer_id: lid,
317 parent_idx: d4i as u32,
318 child_count: 0,
319 });
320 }
321 local_blocks
322 })
323 .collect();
324
325 for (i, local) in d5_results.into_iter().enumerate() {
326 let d4i = depth4_parents_clone[i];
327 blocks[d4i].child_count = local.len() as u16;
328 for b in local {
329 blocks.push(b);
330 depth5_parents.push(blocks.len() - 1);
331 }
332 }
333
334 let mut depth6_parents: Vec<usize> = Vec::new();
336 let d6_results: Vec<Vec<RawBlock>> = depth5_parents
337 .clone()
338 .into_par_iter()
339 .map(|d5i| {
340 let text_owned = String::from_utf8_lossy(&blocks[d5i].data).to_string();
341 let px = blocks[d5i].x;
342 let py = blocks[d5i].y;
343 let pz = blocks[d5i].z;
344 let lid = blocks[d5i].layer_id;
345
346 let chars: Vec<char> = text_owned.chars().collect();
347 if chars.len() < 3 {
348 return vec![];
349 }
350 let chunk_size = 3.max(chars.len() / 3).min(5);
351 let mut local_blocks = Vec::new();
352 for chunk in chars.chunks(chunk_size) {
353 let syl: String = chunk.iter().collect();
354 if syl.trim().is_empty() {
355 continue;
356 }
357 let h = syl
358 .as_bytes()
359 .iter()
360 .fold(0u64, |a, &b| a.wrapping_mul(37).wrapping_add(b as u64));
361 let ox = ((h & 0xFF) as f32 - 128.0) / 2550000.0;
362 let oy = (((h >> 8) & 0xFF) as f32 - 128.0) / 2550000.0;
363 let oz = (((h >> 16) & 0xFF) as f32 - 128.0) / 2550000.0;
364
365 local_blocks.push(RawBlock {
366 data: to_block(&syl),
367 depth: 6,
368 x: px + ox,
369 y: py + oy,
370 z: pz + oz,
371 layer_id: lid,
372 parent_idx: d5i as u32,
373 child_count: 0,
374 });
375 }
376 local_blocks
377 })
378 .collect();
379
380 for (i, local) in d6_results.into_iter().enumerate() {
381 let d5i = depth5_parents[i];
382 blocks[d5i].child_count = local.len() as u16;
383 for b in local {
384 blocks.push(b);
385 depth6_parents.push(blocks.len() - 1);
386 }
387 }
388
389 let mut depth7_parents: Vec<usize> = Vec::new();
391 let d7_results: Vec<Vec<RawBlock>> = depth6_parents
392 .clone()
393 .into_par_iter()
394 .map(|d6i| {
395 let text_owned = String::from_utf8_lossy(&blocks[d6i].data).to_string();
396 let px = blocks[d6i].x;
397 let py = blocks[d6i].y;
398 let pz = blocks[d6i].z;
399 let lid = blocks[d6i].layer_id;
400
401 let mut local_blocks = Vec::new();
402 for ch in text_owned.chars() {
403 if ch.is_whitespace() {
404 continue;
405 }
406 let h = (ch as u64).wrapping_mul(0x517cc1b727220a95);
407 let ox = ((h & 0xFF) as f32 - 128.0) / 25500000.0;
408 let oy = (((h >> 8) & 0xFF) as f32 - 128.0) / 25500000.0;
409 let oz = (((h >> 16) & 0xFF) as f32 - 128.0) / 25500000.0;
410
411 let ch_str = ch.to_string();
412 local_blocks.push(RawBlock {
413 data: to_block(&ch_str),
414 depth: 7,
415 x: px + ox,
416 y: py + oy,
417 z: pz + oz,
418 layer_id: lid,
419 parent_idx: d6i as u32,
420 child_count: 0,
421 });
422 }
423 local_blocks
424 })
425 .collect();
426
427 for (i, local) in d7_results.into_iter().enumerate() {
428 let d6i = depth6_parents[i];
429 blocks[d6i].child_count = local.len() as u16;
430 for b in local {
431 blocks.push(b);
432 depth7_parents.push(blocks.len() - 1);
433 }
434 }
435
436 let d8_results: Vec<Vec<RawBlock>> = depth7_parents
438 .clone()
439 .into_par_iter()
440 .map(|d7i| {
441 let text_owned = String::from_utf8_lossy(&blocks[d7i].data).to_string();
442 let px = blocks[d7i].x;
443 let py = blocks[d7i].y;
444 let pz = blocks[d7i].z;
445 let lid = blocks[d7i].layer_id;
446
447 let bytes = text_owned.as_bytes();
448 let mut local_blocks = Vec::new();
449 for &byte in bytes {
450 let hex = format!("0x{:02X}", byte);
451 let h = (byte as u64).wrapping_mul(0x9E3779B97F4A7C15);
452 let ox = ((h & 0xFF) as f32 - 128.0) / 255000000.0;
453 let oy = (((h >> 8) & 0xFF) as f32 - 128.0) / 255000000.0;
454 let oz = (((h >> 16) & 0xFF) as f32 - 128.0) / 255000000.0;
455
456 local_blocks.push(RawBlock {
457 data: to_block(&hex),
458 depth: 8,
459 x: px + ox,
460 y: py + oy,
461 z: pz + oz,
462 layer_id: lid,
463 parent_idx: d7i as u32,
464 child_count: 0, });
466 }
467 local_blocks
468 })
469 .collect();
470
471 for (i, local) in d8_results.into_iter().enumerate() {
472 let d7i = depth7_parents[i];
473 blocks[d7i].child_count = local.len() as u16;
474 for b in local {
475 blocks.push(b);
476 }
477 }
478
479 let n = blocks.len();
480 println!("\n {} blocks total", n);
481
482 let mut indices: Vec<usize> = (0..n).collect();
484 indices.sort_by_key(|&i| blocks[i].depth);
485
486 let mut old_to_new = vec![0u32; n];
488 for (new_i, &old_i) in indices.iter().enumerate() {
489 old_to_new[old_i] = new_i as u32;
490 }
491
492 let output_dir = Path::new(&config.paths.output_dir);
494 fs::create_dir_all(output_dir).ok();
495
496 let hdr_path = output_dir.join("microscope.bin");
497 let dat_path = output_dir.join("data.bin");
498 let meta_path = output_dir.join("meta.bin");
499
500 let mut hdr_file = BufWriter::new(
501 fs::File::create(&hdr_path).map_err(|e| format!("create microscope.bin: {}", e))?,
502 );
503 let mut dat_file =
504 BufWriter::new(fs::File::create(&dat_path).map_err(|e| format!("create data.bin: {}", e))?);
505
506 let mut depth_ranges: Vec<(u32, u32)> = vec![(0, 0); 9];
507 let mut cur_depth: u8 = 0;
508 let mut range_start: u32 = 0;
509
510 for (new_i, &old_i) in indices.iter().enumerate() {
511 let b = &blocks[old_i];
512 let offset = dat_file
513 .stream_position()
514 .map_err(|e| format!("data.bin stream_position: {}", e))? as u32;
515 let len = b.data.len().min(BLOCK_DATA_SIZE) as u16;
516 dat_file
517 .write_all(&b.data[..len as usize])
518 .map_err(|e| format!("write data.bin: {}", e))?;
519
520 let parent = if b.parent_idx == u32::MAX {
521 u32::MAX
522 } else {
523 old_to_new[b.parent_idx as usize]
524 };
525
526 let crc = crc16_ccitt(&b.data[..len as usize]);
527 let hdr = BlockHeader {
528 x: b.x,
529 y: b.y,
530 z: b.z,
531 zoom: b.depth as f32 / 8.0,
532 depth: b.depth,
533 layer_id: b.layer_id,
534 data_offset: offset,
535 data_len: len,
536 parent_idx: parent,
537 child_count: b.child_count,
538 crc16: crc.to_le_bytes(),
539 };
540
541 let bytes: &[u8] = unsafe {
542 std::slice::from_raw_parts(&hdr as *const BlockHeader as *const u8, HEADER_SIZE)
543 };
544 hdr_file
545 .write_all(bytes)
546 .map_err(|e| format!("write microscope.bin: {}", e))?;
547
548 if b.depth != cur_depth {
550 depth_ranges[cur_depth as usize] = (range_start, new_i as u32 - range_start);
551 range_start = new_i as u32;
552 cur_depth = b.depth;
553 }
554 }
555 depth_ranges[cur_depth as usize] = (range_start, n as u32 - range_start);
556 hdr_file
557 .flush()
558 .map_err(|e| format!("flush microscope.bin: {}", e))?;
559 dat_file
560 .flush()
561 .map_err(|e| format!("flush data.bin: {}", e))?;
562
563 #[cfg(feature = "compression")]
565 if config.performance.compression {
566 let raw_data =
567 fs::read(&dat_path).map_err(|e| format!("read data.bin for compression: {}", e))?;
568 let raw_size = raw_data.len();
569 let compressed = zstd::encode_all(std::io::Cursor::new(&raw_data), 3)
570 .map_err(|e| format!("zstd compress: {}", e))?;
571 let comp_size = compressed.len();
572 let zst_path = output_dir.join("data.bin.zst");
573 fs::write(&zst_path, &compressed).map_err(|e| format!("write data.bin.zst: {}", e))?;
574 let ratio = if comp_size > 0 {
575 raw_size as f64 / comp_size as f64
576 } else {
577 0.0
578 };
579 println!(
580 " {}: {} → {} bytes ({:.1}x ratio)",
581 "zstd".green(),
582 raw_size,
583 comp_size,
584 ratio,
585 );
586 }
587
588 let merkle_path = output_dir.join("merkle.bin");
590 hdr_file
592 .flush()
593 .map_err(|e| format!("flush microscope.bin: {}", e))?;
594 dat_file
595 .flush()
596 .map_err(|e| format!("flush data.bin: {}", e))?;
597
598 let dat_bytes = fs::read(&dat_path).map_err(|e| format!("read data.bin for merkle: {}", e))?;
599 let hdr_bytes =
600 fs::read(&hdr_path).map_err(|e| format!("read microscope.bin for merkle: {}", e))?;
601 let mut leaf_slices: Vec<&[u8]> = Vec::with_capacity(n);
602 for i in 0..n {
603 let hdr_off = i * HEADER_SIZE;
604 let data_offset =
605 u32::from_le_bytes(hdr_bytes[hdr_off + 18..hdr_off + 22].try_into().unwrap()) as usize;
606 let data_len =
607 u16::from_le_bytes(hdr_bytes[hdr_off + 22..hdr_off + 24].try_into().unwrap()) as usize;
608 if data_offset + data_len <= dat_bytes.len() {
609 leaf_slices.push(&dat_bytes[data_offset..data_offset + data_len]);
610 } else {
611 leaf_slices.push(&[]);
612 }
613 }
614
615 let merkle_tree = merkle::MerkleTree::build(&leaf_slices);
616 fs::write(&merkle_path, merkle_tree.to_bytes())
617 .map_err(|e| format!("write merkle.bin: {}", e))?;
618 println!(
619 " {}: {} leaves, root={}",
620 "merkle".green(),
621 merkle_tree.leaf_count,
622 hex_str(&merkle_tree.root)
623 );
624
625 let mut meta_buf = Vec::with_capacity(META_HEADER_SIZE + 9 * DEPTH_ENTRY_SIZE + 32 + 32);
627 meta_buf.extend_from_slice(b"MSC3"); meta_buf.extend_from_slice(&3u32.to_le_bytes()); meta_buf.extend_from_slice(&(n as u32).to_le_bytes()); meta_buf.extend_from_slice(&9u32.to_le_bytes()); for &(start, count) in &depth_ranges {
632 meta_buf.extend_from_slice(&start.to_le_bytes());
633 meta_buf.extend_from_slice(&count.to_le_bytes());
634 }
635 meta_buf.extend_from_slice(&merkle_tree.root); meta_buf.extend_from_slice(&layers_hash); fs::write(meta_path, &meta_buf).map_err(|e| format!("write meta.bin: {}", e))?;
638
639 let hdr_size = n * HEADER_SIZE;
641 let dat_size = dat_file.stream_position().unwrap_or(0) as usize; let meta_size = meta_buf.len();
643 println!(
644 "\n {}: {} bytes ({:.1} KB)",
645 "headers".green(),
646 hdr_size,
647 hdr_size as f64 / 1024.0
648 );
649 println!(
650 " {}: {} bytes ({:.1} KB)",
651 "data".green(),
652 dat_size,
653 dat_size as f64 / 1024.0
654 );
655 println!(" {}: {} bytes", "meta".green(), meta_size);
656 println!(
657 " {}: {:.1} KB",
658 "TOTAL".yellow().bold(),
659 (hdr_size + dat_size + meta_size) as f64 / 1024.0
660 );
661
662 let fits = if hdr_size < 32768 {
663 "L1d (32KB)"
664 } else if hdr_size < 262144 {
665 "L2 (256KB)"
666 } else {
667 "L3"
668 };
669 println!(" cache: {}", fits.green().bold());
670
671 for (d, &(_start, count)) in depth_ranges.iter().enumerate() {
672 println!(" Depth {}: {:>5} blocks", d, count);
673 }
674
675 if config.embedding.provider != "none" {
677 println!("\n Building embedding index...");
678 let emb_path = output_dir.join("embeddings.bin");
679 let reader = MicroscopeReader::open(config)?;
680 let max_depth = config.embedding.max_depth;
681
682 #[cfg(feature = "embeddings")]
683 let provider: Box<dyn crate::embeddings::EmbeddingProvider> =
684 if config.embedding.provider == "candle" {
685 match crate::embeddings::CandleEmbeddingProvider::new(&config.embedding.model) {
686 Ok(p) => Box::new(p),
687 Err(e) => {
688 eprintln!(
689 " {} Candle init failed: {:?}, using mock",
690 "WARN".yellow(),
691 e
692 );
693 Box::new(crate::embeddings::MockEmbeddingProvider::new(
694 config.embedding.dim,
695 ))
696 }
697 }
698 } else {
699 Box::new(crate::embeddings::MockEmbeddingProvider::new(
700 config.embedding.dim,
701 ))
702 };
703
704 #[cfg(not(feature = "embeddings"))]
705 let provider: Box<dyn crate::embeddings::EmbeddingProvider> = Box::new(
706 crate::embeddings::MockEmbeddingProvider::new(config.embedding.dim),
707 );
708
709 match crate::embedding_index::build_embedding_index(
710 &*provider, &reader, max_depth, &emb_path,
711 ) {
712 Ok(()) => println!(" {} embeddings.bin built", "OK".green()),
713 Err(e) => eprintln!(" {} embedding build: {}", "ERR".red(), e),
714 }
715 }
716
717 let hebb_path = output_dir.join("activations.bin");
719 if hebb_path.exists() {
720 let hebb = crate::hebbian::HebbianState::load_or_init(output_dir, n);
721 let drifted = hebb
722 .activations
723 .iter()
724 .filter(|r| {
725 r.drift_x.abs() > 0.001 || r.drift_y.abs() > 0.001 || r.drift_z.abs() > 0.001
726 })
727 .count();
728
729 if drifted > 0 {
730 apply_hebbian_deltas(output_dir, &hebb, n)?;
731 println!(
732 " {} Hebbian deltas applied to {} blocks",
733 "HEBBIAN".magenta(),
734 drifted
735 );
736 }
737 }
738
739 {
741 let reader = MicroscopeReader::open(config)?;
742 let texts: Vec<&str> = (0..reader.block_count).map(|i| reader.text(i)).collect();
743 let table = crate::fingerprint::LinkTable::build(&texts);
744 table.save(output_dir)?;
745 let stats = table.stats();
746 println!(
747 " {} {} links across {} blocks",
748 "FINGERPRINT".cyan(),
749 stats.link_count,
750 stats.block_count
751 );
752 }
753
754 println!("\n{}", "ZERO JSON. Pure binary. Done.".green().bold());
755 Ok(())
756}
757
758fn apply_hebbian_deltas(
760 output_dir: &Path,
761 hebb: &crate::hebbian::HebbianState,
762 block_count: usize,
763) -> Result<(), String> {
764 let hdr_path = output_dir.join("microscope.bin");
765 let mut data = fs::read(&hdr_path).map_err(|e| format!("read microscope.bin: {}", e))?;
766
767 for i in 0..block_count.min(hebb.activations.len()) {
768 let rec = &hebb.activations[i];
769 if rec.drift_x.abs() < 0.001 && rec.drift_y.abs() < 0.001 && rec.drift_z.abs() < 0.001 {
770 continue;
771 }
772
773 let off = i * HEADER_SIZE;
774 if off + 12 > data.len() {
775 break;
776 }
777
778 let x = f32::from_le_bytes(data[off..off + 4].try_into().unwrap());
780 let y = f32::from_le_bytes(data[off + 4..off + 8].try_into().unwrap());
781 let z = f32::from_le_bytes(data[off + 8..off + 12].try_into().unwrap());
782
783 let new_x = x + rec.drift_x;
785 let new_y = y + rec.drift_y;
786 let new_z = z + rec.drift_z;
787
788 data[off..off + 4].copy_from_slice(&new_x.to_le_bytes());
789 data[off + 4..off + 8].copy_from_slice(&new_y.to_le_bytes());
790 data[off + 8..off + 12].copy_from_slice(&new_z.to_le_bytes());
791 }
792
793 fs::write(&hdr_path, &data).map_err(|e| format!("write microscope.bin: {}", e))?;
794
795 let mut hebb_clone = crate::hebbian::HebbianState {
797 activations: hebb.activations.clone(),
798 coactivations: hebb.coactivations.clone(),
799 fingerprints: hebb.fingerprints.clone(),
800 };
801 for rec in &mut hebb_clone.activations {
802 rec.drift_x = 0.0;
803 rec.drift_y = 0.0;
804 rec.drift_z = 0.0;
805 }
806 hebb_clone
807 .save(output_dir)
808 .map_err(|e| format!("save cleared Hebbian: {}", e))
809}