Skip to main content

microscope_memory/
build.rs

1//! Extracts text from RAW TEXT layer files, constructs a 9-depth block hierarchy
2//! (identity → layers → clusters → items → sentences → tokens → syllables → chars → bytes),
3//! and writes the binary output files (microscope.bin, data.bin, meta.bin, merkle.bin, embeddings.bin).
4
5use crate::config::Config;
6use crate::reader::{BlockHeader, MicroscopeReader};
7use crate::{
8    content_coords_blended, crc16_ccitt, hex_str, layer_to_id, merkle, safe_truncate, to_block,
9    BLOCK_DATA_SIZE, DEPTH_ENTRY_SIZE, HEADER_SIZE, META_HEADER_SIZE,
10};
11
12use colored::Colorize;
13use rayon::prelude::*;
14use sha2::{Digest, Sha256};
15use std::fs;
16use std::io::{BufWriter, Seek, Write};
17use std::path::Path;
18
19// ─── Internal block for building ─────────────────────
20struct RawBlock {
21    data: Vec<u8>,
22    depth: u8,
23    x: f32,
24    y: f32,
25    z: f32,
26    layer_id: u8,
27    parent_idx: u32,
28    child_count: u16,
29}
30
31// ─── Extract text values from RAW files ───────────────────
32// Zero JSON dependency. Standard UTF-8 text files.
33// Files are read and split into blocks by default.
34
35fn extract_texts_from_file(path: &Path) -> Vec<String> {
36    let mut texts = Vec::new();
37    let raw = match fs::read_to_string(path) {
38        Ok(s) => s,
39        Err(_) => return texts,
40    };
41
42    // Split by double newline or chunking
43    for chunk in raw.split("\n\n") {
44        let trimmed = chunk.trim();
45        if trimmed.len() > 3 {
46            texts.push(trimmed.to_string());
47        }
48    }
49
50    // Fallback if no doubles: chunk by size
51    if texts.len() < 2 {
52        texts.clear();
53        let chars: Vec<char> = raw.chars().collect();
54        for chunk in chars.chunks(BLOCK_DATA_SIZE) {
55            let s: String = chunk.iter().collect();
56            if s.trim().len() > 5 {
57                texts.push(s);
58            }
59        }
60    }
61
62    texts
63}
64
65// ─── Split text into sentences ───────────────────────
66fn split_sentences(text: &str) -> Vec<String> {
67    let mut sentences = Vec::new();
68    let mut current = String::new();
69    for ch in text.chars() {
70        current.push(ch);
71        if matches!(ch, '.' | '!' | '?' | '\n') && current.len() > 10 {
72            sentences.push(current.trim().to_string());
73            current = String::new();
74        }
75    }
76    if current.trim().len() > 5 {
77        sentences.push(current.trim().to_string());
78    }
79    sentences
80}
81
82// ─── Compute deterministic SHA-256 hash of all layer source files ────
83pub fn compute_layers_hash(config: &Config) -> [u8; 32] {
84    let layers_dir = Path::new(&config.paths.layers_dir);
85    let layer_files = &config.memory_layers.layers;
86    let mut sorted_names: Vec<&String> = layer_files.iter().collect();
87    sorted_names.sort();
88    let mut hasher = Sha256::new();
89    for name in &sorted_names {
90        let path = layers_dir.join(format!("{}.txt", name));
91        if let Ok(contents) = fs::read(&path) {
92            hasher.update(&contents);
93        }
94    }
95    let result = hasher.finalize();
96    let mut hash = [0u8; 32];
97    hash.copy_from_slice(&result);
98    hash
99}
100
101// ─── BUILD: layers/ → binary ─────────────────────────
102pub fn build(config: &Config, force: bool) -> Result<(), String> {
103    let layers_hash = compute_layers_hash(config);
104
105    // Incremental build check — skip if layers unchanged
106    if !force {
107        let output_dir = Path::new(&config.paths.output_dir);
108        let meta_path = output_dir.join("meta.bin");
109        if let Ok(meta) = fs::read(&meta_path) {
110            if meta.len() >= 152 && &meta[0..4] == b"MSC3" {
111                let stored_hash = &meta[120..152];
112                if stored_hash == &layers_hash[..] {
113                    println!("{}", "Layers unchanged — skipping rebuild".green().bold());
114                    return Ok(());
115                }
116            }
117        }
118    }
119
120    println!(
121        "{}",
122        "Building microscope from raw layers (zero JSON)..."
123            .cyan()
124            .bold()
125    );
126
127    let layers_dir = Path::new(&config.paths.layers_dir);
128    let output_dir = Path::new(&config.paths.output_dir);
129
130    if !output_dir.exists() {
131        fs::create_dir_all(output_dir).map_err(|e| format!("create output dir: {}", e))?;
132    }
133
134    let layer_files = &config.memory_layers.layers;
135
136    // Collect all raw texts per layer
137    let mut layer_texts: Vec<(String, Vec<String>)> = Vec::new();
138    for name in layer_files {
139        let path = layers_dir.join(format!("{}.txt", name));
140        let texts = extract_texts_from_file(&path);
141        println!("  {} {}: {} items", ">".green(), name, texts.len());
142        layer_texts.push((name.clone(), texts));
143    }
144
145    let mut blocks: Vec<RawBlock> = Vec::new();
146
147    // ═══ DEPTH 0: Identity ═══
148    let identity = "Microscope Memory: 9-depth hierarchical cognitive engine. Binary mmap, sub-microsecond spatial search, Hebbian learning, Merkle integrity.";
149    blocks.push(RawBlock {
150        data: to_block(identity),
151        depth: 0,
152        x: 0.25,
153        y: 0.25,
154        z: 0.25,
155        layer_id: 0,
156        parent_idx: u32::MAX,
157        child_count: layer_files.len() as u16,
158    });
159
160    // ═══ DEPTH 1: Layer summaries ═══
161    let sw = config.search.semantic_weight;
162    let depth1_start = blocks.len();
163    for (name, texts) in &layer_texts {
164        let preview: Vec<String> = texts.iter().take(3).map(|s| safe_truncate(s, 40)).collect();
165        let summary = format!("[{}] {} elem. {}", name, texts.len(), preview.join(" | "));
166        let (x, y, z) = content_coords_blended(name, name, sw);
167        blocks.push(RawBlock {
168            data: to_block(&summary),
169            depth: 1,
170            x,
171            y,
172            z,
173            layer_id: layer_to_id(name),
174            parent_idx: 0,
175            child_count: texts.len().div_ceil(5) as u16, // cluster count
176        });
177    }
178
179    // ═══ DEPTH 2: Clusters (5 items each) ═══
180    let _depth2_start = blocks.len();
181    let mut depth2_layer_offsets: Vec<(usize, usize)> = Vec::new(); // (start_in_blocks, count)
182    for (li, (name, texts)) in layer_texts.iter().enumerate() {
183        let cluster_start = blocks.len();
184        for ci in (0..texts.len()).step_by(5) {
185            let chunk: Vec<String> = texts[ci..texts.len().min(ci + 5)]
186                .iter()
187                .map(|s| safe_truncate(s, 40))
188                .collect();
189            let summary = format!("[{} #{}] {}", name, ci / 5, chunk.join(" | "));
190            let (x, y, z) = content_coords_blended(&summary, name, sw);
191            blocks.push(RawBlock {
192                data: to_block(&summary),
193                depth: 2,
194                x,
195                y,
196                z,
197                layer_id: layer_to_id(name),
198                parent_idx: (depth1_start + li) as u32,
199                child_count: chunk.len() as u16,
200            });
201        }
202        depth2_layer_offsets.push((cluster_start, blocks.len() - cluster_start));
203    }
204
205    // ═══ DEPTH 3: Individual items ═══
206    let depth3_start = blocks.len();
207    let mut depth3_positions: Vec<(f32, f32, f32)> = Vec::new();
208    for (li, (name, texts)) in layer_texts.iter().enumerate() {
209        for (ti, text) in texts.iter().enumerate() {
210            let (x, y, z) = content_coords_blended(text, name, sw);
211            let cluster_idx = ti / 5;
212            let (d2_start, d2_count) = depth2_layer_offsets[li];
213            let parent = if cluster_idx < d2_count {
214                (d2_start + cluster_idx) as u32
215            } else {
216                u32::MAX
217            };
218
219            blocks.push(RawBlock {
220                data: to_block(text),
221                depth: 3,
222                x,
223                y,
224                z,
225                layer_id: layer_to_id(name),
226                parent_idx: parent,
227                child_count: 0, // will update
228            });
229            depth3_positions.push((x, y, z));
230        }
231    }
232
233    // ═══ DEPTH 4: Sentences ═══
234    let _depth4_start = blocks.len();
235    let mut depth4_parents: Vec<usize> = Vec::new();
236
237    let d4_results: Vec<Vec<RawBlock>> = (depth3_start..(depth3_start + depth3_positions.len()))
238        .into_par_iter()
239        .map(|d3i| {
240            let text = std::str::from_utf8(&blocks[d3i].data).unwrap_or("");
241            let sentences = split_sentences(text);
242            let mut local_blocks = Vec::new();
243            for sent in &sentences {
244                if sent.len() < 10 {
245                    continue;
246                }
247                let (px, py, pz) = depth3_positions[d3i - depth3_start];
248                let h = sent
249                    .as_bytes()
250                    .iter()
251                    .fold(0u64, |a, &b| a.wrapping_mul(31).wrapping_add(b as u64));
252                let ox = ((h & 0xFF) as f32 - 128.0) / 25500.0;
253                let oy = (((h >> 8) & 0xFF) as f32 - 128.0) / 25500.0;
254                let oz = (((h >> 16) & 0xFF) as f32 - 128.0) / 25500.0;
255
256                local_blocks.push(RawBlock {
257                    data: to_block(sent),
258                    depth: 4,
259                    x: px + ox,
260                    y: py + oy,
261                    z: pz + oz,
262                    layer_id: blocks[d3i].layer_id,
263                    parent_idx: d3i as u32,
264                    child_count: 0,
265                });
266            }
267            local_blocks
268        })
269        .collect();
270
271    for (i, local) in d4_results.into_iter().enumerate() {
272        let d3i = depth3_start + i;
273        blocks[d3i].child_count = local.len() as u16;
274        for b in local {
275            blocks.push(b);
276            depth4_parents.push(blocks.len() - 1);
277        }
278    }
279
280    // ═══ DEPTH 5: Tokens (words) ═══
281    let mut depth5_parents: Vec<usize> = Vec::new();
282    let depth4_parents_clone = depth4_parents.clone();
283    let d5_results: Vec<Vec<RawBlock>> = depth4_parents
284        .into_par_iter()
285        .map(|d4i| {
286            let text_owned = String::from_utf8_lossy(&blocks[d4i].data).to_string();
287            let px = blocks[d4i].x;
288            let py = blocks[d4i].y;
289            let pz = blocks[d4i].z;
290            let lid = blocks[d4i].layer_id;
291
292            let tokens: Vec<String> = text_owned
293                .split_whitespace()
294                .take(8)
295                .map(|s| s.to_string())
296                .collect();
297            let mut local_blocks = Vec::new();
298            for tok in &tokens {
299                if tok.len() < 2 {
300                    continue;
301                }
302                let h = tok
303                    .as_bytes()
304                    .iter()
305                    .fold(0u64, |a, &b| a.wrapping_mul(31).wrapping_add(b as u64));
306                let ox = ((h & 0xFF) as f32 - 128.0) / 255000.0;
307                let oy = (((h >> 8) & 0xFF) as f32 - 128.0) / 255000.0;
308                let oz = (((h >> 16) & 0xFF) as f32 - 128.0) / 255000.0;
309
310                local_blocks.push(RawBlock {
311                    data: to_block(tok),
312                    depth: 5,
313                    x: px + ox,
314                    y: py + oy,
315                    z: pz + oz,
316                    layer_id: lid,
317                    parent_idx: d4i as u32,
318                    child_count: 0,
319                });
320            }
321            local_blocks
322        })
323        .collect();
324
325    for (i, local) in d5_results.into_iter().enumerate() {
326        let d4i = depth4_parents_clone[i];
327        blocks[d4i].child_count = local.len() as u16;
328        for b in local {
329            blocks.push(b);
330            depth5_parents.push(blocks.len() - 1);
331        }
332    }
333
334    // ═══ DEPTH 6: Syllables / morphemes (sub-word) ═══
335    let mut depth6_parents: Vec<usize> = Vec::new();
336    let d6_results: Vec<Vec<RawBlock>> = depth5_parents
337        .clone()
338        .into_par_iter()
339        .map(|d5i| {
340            let text_owned = String::from_utf8_lossy(&blocks[d5i].data).to_string();
341            let px = blocks[d5i].x;
342            let py = blocks[d5i].y;
343            let pz = blocks[d5i].z;
344            let lid = blocks[d5i].layer_id;
345
346            let chars: Vec<char> = text_owned.chars().collect();
347            if chars.len() < 3 {
348                return vec![];
349            }
350            let chunk_size = 3.max(chars.len() / 3).min(5);
351            let mut local_blocks = Vec::new();
352            for chunk in chars.chunks(chunk_size) {
353                let syl: String = chunk.iter().collect();
354                if syl.trim().is_empty() {
355                    continue;
356                }
357                let h = syl
358                    .as_bytes()
359                    .iter()
360                    .fold(0u64, |a, &b| a.wrapping_mul(37).wrapping_add(b as u64));
361                let ox = ((h & 0xFF) as f32 - 128.0) / 2550000.0;
362                let oy = (((h >> 8) & 0xFF) as f32 - 128.0) / 2550000.0;
363                let oz = (((h >> 16) & 0xFF) as f32 - 128.0) / 2550000.0;
364
365                local_blocks.push(RawBlock {
366                    data: to_block(&syl),
367                    depth: 6,
368                    x: px + ox,
369                    y: py + oy,
370                    z: pz + oz,
371                    layer_id: lid,
372                    parent_idx: d5i as u32,
373                    child_count: 0,
374                });
375            }
376            local_blocks
377        })
378        .collect();
379
380    for (i, local) in d6_results.into_iter().enumerate() {
381        let d5i = depth5_parents[i];
382        blocks[d5i].child_count = local.len() as u16;
383        for b in local {
384            blocks.push(b);
385            depth6_parents.push(blocks.len() - 1);
386        }
387    }
388
389    // ═══ DEPTH 7: Characters ═══
390    let mut depth7_parents: Vec<usize> = Vec::new();
391    let d7_results: Vec<Vec<RawBlock>> = depth6_parents
392        .clone()
393        .into_par_iter()
394        .map(|d6i| {
395            let text_owned = String::from_utf8_lossy(&blocks[d6i].data).to_string();
396            let px = blocks[d6i].x;
397            let py = blocks[d6i].y;
398            let pz = blocks[d6i].z;
399            let lid = blocks[d6i].layer_id;
400
401            let mut local_blocks = Vec::new();
402            for ch in text_owned.chars() {
403                if ch.is_whitespace() {
404                    continue;
405                }
406                let h = (ch as u64).wrapping_mul(0x517cc1b727220a95);
407                let ox = ((h & 0xFF) as f32 - 128.0) / 25500000.0;
408                let oy = (((h >> 8) & 0xFF) as f32 - 128.0) / 25500000.0;
409                let oz = (((h >> 16) & 0xFF) as f32 - 128.0) / 25500000.0;
410
411                let ch_str = ch.to_string();
412                local_blocks.push(RawBlock {
413                    data: to_block(&ch_str),
414                    depth: 7,
415                    x: px + ox,
416                    y: py + oy,
417                    z: pz + oz,
418                    layer_id: lid,
419                    parent_idx: d6i as u32,
420                    child_count: 0,
421                });
422            }
423            local_blocks
424        })
425        .collect();
426
427    for (i, local) in d7_results.into_iter().enumerate() {
428        let d6i = depth6_parents[i];
429        blocks[d6i].child_count = local.len() as u16;
430        for b in local {
431            blocks.push(b);
432            depth7_parents.push(blocks.len() - 1);
433        }
434    }
435
436    // ═══ DEPTH 8: Raw bytes — the atomic level. Below this, data corrupts. ═══
437    let d8_results: Vec<Vec<RawBlock>> = depth7_parents
438        .clone()
439        .into_par_iter()
440        .map(|d7i| {
441            let text_owned = String::from_utf8_lossy(&blocks[d7i].data).to_string();
442            let px = blocks[d7i].x;
443            let py = blocks[d7i].y;
444            let pz = blocks[d7i].z;
445            let lid = blocks[d7i].layer_id;
446
447            let bytes = text_owned.as_bytes();
448            let mut local_blocks = Vec::new();
449            for &byte in bytes {
450                let hex = format!("0x{:02X}", byte);
451                let h = (byte as u64).wrapping_mul(0x9E3779B97F4A7C15);
452                let ox = ((h & 0xFF) as f32 - 128.0) / 255000000.0;
453                let oy = (((h >> 8) & 0xFF) as f32 - 128.0) / 255000000.0;
454                let oz = (((h >> 16) & 0xFF) as f32 - 128.0) / 255000000.0;
455
456                local_blocks.push(RawBlock {
457                    data: to_block(&hex),
458                    depth: 8,
459                    x: px + ox,
460                    y: py + oy,
461                    z: pz + oz,
462                    layer_id: lid,
463                    parent_idx: d7i as u32,
464                    child_count: 0, // LEAF. Below = corruption.
465                });
466            }
467            local_blocks
468        })
469        .collect();
470
471    for (i, local) in d8_results.into_iter().enumerate() {
472        let d7i = depth7_parents[i];
473        blocks[d7i].child_count = local.len() as u16;
474        for b in local {
475            blocks.push(b);
476        }
477    }
478
479    let n = blocks.len();
480    println!("\n  {} blocks total", n);
481
482    // Sort by depth
483    let mut indices: Vec<usize> = (0..n).collect();
484    indices.sort_by_key(|&i| blocks[i].depth);
485
486    // Remap parent indices after sort
487    let mut old_to_new = vec![0u32; n];
488    for (new_i, &old_i) in indices.iter().enumerate() {
489        old_to_new[old_i] = new_i as u32;
490    }
491
492    // Write binary files
493    let output_dir = Path::new(&config.paths.output_dir);
494    fs::create_dir_all(output_dir).ok();
495
496    let hdr_path = output_dir.join("microscope.bin");
497    let dat_path = output_dir.join("data.bin");
498    let meta_path = output_dir.join("meta.bin");
499
500    let mut hdr_file = BufWriter::new(
501        fs::File::create(&hdr_path).map_err(|e| format!("create microscope.bin: {}", e))?,
502    );
503    let mut dat_file =
504        BufWriter::new(fs::File::create(&dat_path).map_err(|e| format!("create data.bin: {}", e))?);
505
506    let mut depth_ranges: Vec<(u32, u32)> = vec![(0, 0); 9];
507    let mut cur_depth: u8 = 0;
508    let mut range_start: u32 = 0;
509
510    for (new_i, &old_i) in indices.iter().enumerate() {
511        let b = &blocks[old_i];
512        let offset = dat_file
513            .stream_position()
514            .map_err(|e| format!("data.bin stream_position: {}", e))? as u32;
515        let len = b.data.len().min(BLOCK_DATA_SIZE) as u16;
516        dat_file
517            .write_all(&b.data[..len as usize])
518            .map_err(|e| format!("write data.bin: {}", e))?;
519
520        let parent = if b.parent_idx == u32::MAX {
521            u32::MAX
522        } else {
523            old_to_new[b.parent_idx as usize]
524        };
525
526        let crc = crc16_ccitt(&b.data[..len as usize]);
527        let hdr = BlockHeader {
528            x: b.x,
529            y: b.y,
530            z: b.z,
531            zoom: b.depth as f32 / 8.0,
532            depth: b.depth,
533            layer_id: b.layer_id,
534            data_offset: offset,
535            data_len: len,
536            parent_idx: parent,
537            child_count: b.child_count,
538            crc16: crc.to_le_bytes(),
539        };
540
541        let bytes: &[u8] = unsafe {
542            std::slice::from_raw_parts(&hdr as *const BlockHeader as *const u8, HEADER_SIZE)
543        };
544        hdr_file
545            .write_all(bytes)
546            .map_err(|e| format!("write microscope.bin: {}", e))?;
547
548        // Track depth ranges
549        if b.depth != cur_depth {
550            depth_ranges[cur_depth as usize] = (range_start, new_i as u32 - range_start);
551            range_start = new_i as u32;
552            cur_depth = b.depth;
553        }
554    }
555    depth_ranges[cur_depth as usize] = (range_start, n as u32 - range_start);
556    hdr_file
557        .flush()
558        .map_err(|e| format!("flush microscope.bin: {}", e))?;
559    dat_file
560        .flush()
561        .map_err(|e| format!("flush data.bin: {}", e))?;
562
563    // ═══ Optional zstd compression of data.bin ═══
564    #[cfg(feature = "compression")]
565    if config.performance.compression {
566        let raw_data =
567            fs::read(&dat_path).map_err(|e| format!("read data.bin for compression: {}", e))?;
568        let raw_size = raw_data.len();
569        let compressed = zstd::encode_all(std::io::Cursor::new(&raw_data), 3)
570            .map_err(|e| format!("zstd compress: {}", e))?;
571        let comp_size = compressed.len();
572        let zst_path = output_dir.join("data.bin.zst");
573        fs::write(&zst_path, &compressed).map_err(|e| format!("write data.bin.zst: {}", e))?;
574        let ratio = if comp_size > 0 {
575            raw_size as f64 / comp_size as f64
576        } else {
577            0.0
578        };
579        println!(
580            "  {}: {} → {} bytes ({:.1}x ratio)",
581            "zstd".green(),
582            raw_size,
583            comp_size,
584            ratio,
585        );
586    }
587
588    // ═══ Merkle tree: SHA-256 over all block data ═══
589    let merkle_path = output_dir.join("merkle.bin");
590    // Re-read data.bin to get all block data slices for Merkle leaves
591    hdr_file
592        .flush()
593        .map_err(|e| format!("flush microscope.bin: {}", e))?;
594    dat_file
595        .flush()
596        .map_err(|e| format!("flush data.bin: {}", e))?;
597
598    let dat_bytes = fs::read(&dat_path).map_err(|e| format!("read data.bin for merkle: {}", e))?;
599    let hdr_bytes =
600        fs::read(&hdr_path).map_err(|e| format!("read microscope.bin for merkle: {}", e))?;
601    let mut leaf_slices: Vec<&[u8]> = Vec::with_capacity(n);
602    for i in 0..n {
603        let hdr_off = i * HEADER_SIZE;
604        let data_offset =
605            u32::from_le_bytes(hdr_bytes[hdr_off + 18..hdr_off + 22].try_into().unwrap()) as usize;
606        let data_len =
607            u16::from_le_bytes(hdr_bytes[hdr_off + 22..hdr_off + 24].try_into().unwrap()) as usize;
608        if data_offset + data_len <= dat_bytes.len() {
609            leaf_slices.push(&dat_bytes[data_offset..data_offset + data_len]);
610        } else {
611            leaf_slices.push(&[]);
612        }
613    }
614
615    let merkle_tree = merkle::MerkleTree::build(&leaf_slices);
616    fs::write(&merkle_path, merkle_tree.to_bytes())
617        .map_err(|e| format!("write merkle.bin: {}", e))?;
618    println!(
619        "  {}: {} leaves, root={}",
620        "merkle".green(),
621        merkle_tree.leaf_count,
622        hex_str(&merkle_tree.root)
623    );
624
625    // meta.bin — MSC3 format with merkle root + layers hash
626    let mut meta_buf = Vec::with_capacity(META_HEADER_SIZE + 9 * DEPTH_ENTRY_SIZE + 32 + 32);
627    meta_buf.extend_from_slice(b"MSC3"); // magic v3
628    meta_buf.extend_from_slice(&3u32.to_le_bytes()); // version
629    meta_buf.extend_from_slice(&(n as u32).to_le_bytes()); // block_count
630    meta_buf.extend_from_slice(&9u32.to_le_bytes()); // depth_count
631    for &(start, count) in &depth_ranges {
632        meta_buf.extend_from_slice(&start.to_le_bytes());
633        meta_buf.extend_from_slice(&count.to_le_bytes());
634    }
635    meta_buf.extend_from_slice(&merkle_tree.root); // 32 bytes merkle root
636    meta_buf.extend_from_slice(&layers_hash); // 32 bytes layers content hash
637    fs::write(meta_path, &meta_buf).map_err(|e| format!("write meta.bin: {}", e))?;
638
639    // Report
640    let hdr_size = n * HEADER_SIZE;
641    let dat_size = dat_file.stream_position().unwrap_or(0) as usize; // Get final data size
642    let meta_size = meta_buf.len();
643    println!(
644        "\n  {}: {} bytes ({:.1} KB)",
645        "headers".green(),
646        hdr_size,
647        hdr_size as f64 / 1024.0
648    );
649    println!(
650        "  {}:    {} bytes ({:.1} KB)",
651        "data".green(),
652        dat_size,
653        dat_size as f64 / 1024.0
654    );
655    println!("  {}:    {} bytes", "meta".green(), meta_size);
656    println!(
657        "  {}:   {:.1} KB",
658        "TOTAL".yellow().bold(),
659        (hdr_size + dat_size + meta_size) as f64 / 1024.0
660    );
661
662    let fits = if hdr_size < 32768 {
663        "L1d (32KB)"
664    } else if hdr_size < 262144 {
665        "L2 (256KB)"
666    } else {
667        "L3"
668    };
669    println!("  cache:   {}", fits.green().bold());
670
671    for (d, &(_start, count)) in depth_ranges.iter().enumerate() {
672        println!("  Depth {}: {:>5} blocks", d, count);
673    }
674
675    // ═══ Embedding index (mock provider, or candle if enabled) ═══
676    if config.embedding.provider != "none" {
677        println!("\n  Building embedding index...");
678        let emb_path = output_dir.join("embeddings.bin");
679        let reader = MicroscopeReader::open(config)?;
680        let max_depth = config.embedding.max_depth;
681
682        #[cfg(feature = "embeddings")]
683        let provider: Box<dyn crate::embeddings::EmbeddingProvider> =
684            if config.embedding.provider == "candle" {
685                match crate::embeddings::CandleEmbeddingProvider::new(&config.embedding.model) {
686                    Ok(p) => Box::new(p),
687                    Err(e) => {
688                        eprintln!(
689                            "  {} Candle init failed: {:?}, using mock",
690                            "WARN".yellow(),
691                            e
692                        );
693                        Box::new(crate::embeddings::MockEmbeddingProvider::new(
694                            config.embedding.dim,
695                        ))
696                    }
697                }
698            } else {
699                Box::new(crate::embeddings::MockEmbeddingProvider::new(
700                    config.embedding.dim,
701                ))
702            };
703
704        #[cfg(not(feature = "embeddings"))]
705        let provider: Box<dyn crate::embeddings::EmbeddingProvider> = Box::new(
706            crate::embeddings::MockEmbeddingProvider::new(config.embedding.dim),
707        );
708
709        match crate::embedding_index::build_embedding_index(
710            &*provider, &reader, max_depth, &emb_path,
711        ) {
712            Ok(()) => println!("  {} embeddings.bin built", "OK".green()),
713            Err(e) => eprintln!("  {} embedding build: {}", "ERR".red(), e),
714        }
715    }
716
717    // ═══ Hebbian delta integration ═══
718    let hebb_path = output_dir.join("activations.bin");
719    if hebb_path.exists() {
720        let hebb = crate::hebbian::HebbianState::load_or_init(output_dir, n);
721        let drifted = hebb
722            .activations
723            .iter()
724            .filter(|r| {
725                r.drift_x.abs() > 0.001 || r.drift_y.abs() > 0.001 || r.drift_z.abs() > 0.001
726            })
727            .count();
728
729        if drifted > 0 {
730            apply_hebbian_deltas(output_dir, &hebb, n)?;
731            println!(
732                "  {} Hebbian deltas applied to {} blocks",
733                "HEBBIAN".magenta(),
734                drifted
735            );
736        }
737    }
738
739    // ═══ Structural fingerprinting ═══
740    {
741        let reader = MicroscopeReader::open(config)?;
742        let texts: Vec<&str> = (0..reader.block_count).map(|i| reader.text(i)).collect();
743        let table = crate::fingerprint::LinkTable::build(&texts);
744        table.save(output_dir)?;
745        let stats = table.stats();
746        println!(
747            "  {} {} links across {} blocks",
748            "FINGERPRINT".cyan(),
749            stats.link_count,
750            stats.block_count
751        );
752    }
753
754    println!("\n{}", "ZERO JSON. Pure binary. Done.".green().bold());
755    Ok(())
756}
757
758/// Post-process: apply Hebbian drift deltas to microscope.bin header coordinates.
759fn apply_hebbian_deltas(
760    output_dir: &Path,
761    hebb: &crate::hebbian::HebbianState,
762    block_count: usize,
763) -> Result<(), String> {
764    let hdr_path = output_dir.join("microscope.bin");
765    let mut data = fs::read(&hdr_path).map_err(|e| format!("read microscope.bin: {}", e))?;
766
767    for i in 0..block_count.min(hebb.activations.len()) {
768        let rec = &hebb.activations[i];
769        if rec.drift_x.abs() < 0.001 && rec.drift_y.abs() < 0.001 && rec.drift_z.abs() < 0.001 {
770            continue;
771        }
772
773        let off = i * HEADER_SIZE;
774        if off + 12 > data.len() {
775            break;
776        }
777
778        // Read current x, y, z (first 12 bytes of header, 3×f32 LE)
779        let x = f32::from_le_bytes(data[off..off + 4].try_into().unwrap());
780        let y = f32::from_le_bytes(data[off + 4..off + 8].try_into().unwrap());
781        let z = f32::from_le_bytes(data[off + 8..off + 12].try_into().unwrap());
782
783        // Apply drift
784        let new_x = x + rec.drift_x;
785        let new_y = y + rec.drift_y;
786        let new_z = z + rec.drift_z;
787
788        data[off..off + 4].copy_from_slice(&new_x.to_le_bytes());
789        data[off + 4..off + 8].copy_from_slice(&new_y.to_le_bytes());
790        data[off + 8..off + 12].copy_from_slice(&new_z.to_le_bytes());
791    }
792
793    fs::write(&hdr_path, &data).map_err(|e| format!("write microscope.bin: {}", e))?;
794
795    // Clear drift values after integration (they're now baked in)
796    let mut hebb_clone = crate::hebbian::HebbianState {
797        activations: hebb.activations.clone(),
798        coactivations: hebb.coactivations.clone(),
799        fingerprints: hebb.fingerprints.clone(),
800    };
801    for rec in &mut hebb_clone.activations {
802        rec.drift_x = 0.0;
803        rec.drift_y = 0.0;
804        rec.drift_z = 0.0;
805    }
806    hebb_clone
807        .save(output_dir)
808        .map_err(|e| format!("save cleared Hebbian: {}", e))
809}