Skip to main content

microscope_memory/
archetype.rs

1//! Archetype emergence layer for Microscope Memory.
2//!
3//! Archetypes are recurring activation patterns that crystallize from
4//! repeated Hebbian/mirror/resonance activity. When the same spatial
5//! regions fire together often enough, an archetype "emerges" —
6//! a stable attractor in the memory landscape.
7//!
8//! Archetypes are visible at depth 0 (D0) — the highest zoom level —
9//! as named clusters that represent concepts the system has learned.
10//!
11//! Binary format: archetypes.bin (ARC1)
12
13use std::collections::HashMap;
14use std::fs;
15use std::path::Path;
16
17use crate::hebbian;
18use crate::resonance::ResonanceState;
19
20// ─── Constants ──────────────────────────────────────
21
22/// Minimum resonance field energy to seed an archetype.
23const SEED_THRESHOLD: f32 = 2.0;
24/// Minimum number of member blocks for a valid archetype.
25const MIN_MEMBERS: usize = 3;
26/// Maximum number of archetypes.
27const MAX_ARCHETYPES: usize = 100;
28/// Spatial radius for clustering blocks into an archetype.
29const CLUSTER_RADIUS: f32 = 0.15;
30/// Archetype strength decay per cycle.
31const ARCHETYPE_DECAY: f32 = 0.98;
32
33// ─── Types ──────────────────────────────────────────
34
35/// An archetype — an emergent concept crystallized from activation patterns.
36#[derive(Clone, Debug)]
37pub struct Archetype {
38    /// Unique archetype ID.
39    pub id: u32,
40    /// Centroid in 3D space (average of member block coordinates).
41    pub centroid: (f32, f32, f32),
42    /// Member block indices.
43    pub members: Vec<u32>,
44    /// Archetype strength (accumulated from resonance + Hebbian energy).
45    pub strength: f32,
46    /// Number of times this archetype has been reinforced.
47    pub reinforcement_count: u32,
48    /// Timestamp of first emergence.
49    pub emerged_ms: u64,
50    /// Timestamp of last reinforcement.
51    pub last_reinforced_ms: u64,
52    /// Auto-generated label (derived from member block content).
53    pub label: String,
54}
55
56/// Archetype system state.
57pub struct ArchetypeState {
58    pub archetypes: Vec<Archetype>,
59    next_id: u32,
60}
61
62impl ArchetypeState {
63    /// Load or initialize archetype state.
64    pub fn load_or_init(output_dir: &Path) -> Self {
65        load_archetypes(output_dir).unwrap_or_else(|| Self {
66            archetypes: Vec::new(),
67            next_id: 1,
68        })
69    }
70
71    /// Detect new archetypes from the resonance field and Hebbian state.
72    /// Returns the number of new archetypes emerged.
73    pub fn detect(
74        &mut self,
75        resonance: &ResonanceState,
76        hebb: &hebbian::HebbianState,
77        headers: &[(f32, f32, f32)],
78        block_texts: &[&str],
79    ) -> usize {
80        let now_ms = hebbian::now_epoch_ms_pub();
81        let mut new_count = 0;
82
83        // Find hot spots in the resonance field above threshold
84        let hot_cells: Vec<((i16, i16, i16), f32)> = resonance
85            .field
86            .iter()
87            .filter(|(_, &v)| v >= SEED_THRESHOLD)
88            .map(|(&k, &v)| (k, v))
89            .collect();
90
91        for ((qx, qy, qz), field_energy) in hot_cells {
92            // Convert quantized coords back to float
93            let cx = qx as f32 / 20.0;
94            let cy = qy as f32 / 20.0;
95            let cz = qz as f32 / 20.0;
96
97            // Check if an existing archetype already covers this region
98            if self
99                .archetypes
100                .iter()
101                .any(|a| spatial_dist(a.centroid, (cx, cy, cz)) < CLUSTER_RADIUS)
102            {
103                // Reinforce existing archetype instead
104                if let Some(a) = self.archetypes.iter_mut().min_by(|a, b| {
105                    spatial_dist(a.centroid, (cx, cy, cz))
106                        .partial_cmp(&spatial_dist(b.centroid, (cx, cy, cz)))
107                        .unwrap()
108                }) {
109                    a.strength += field_energy * 0.1;
110                    a.reinforcement_count += 1;
111                    a.last_reinforced_ms = now_ms;
112                }
113                continue;
114            }
115
116            // Find nearby blocks to form the archetype
117            let mut members = Vec::new();
118            for (idx, (bx, by, bz)) in headers.iter().enumerate() {
119                let dx = cx - bx;
120                let dy = cy - by;
121                let dz = cz - bz;
122                let dist = (dx * dx + dy * dy + dz * dz).sqrt();
123
124                if dist < CLUSTER_RADIUS {
125                    // Extra requirement: block must have some Hebbian activity
126                    if idx < hebb.activations.len() && hebb.activations[idx].activation_count > 0 {
127                        members.push(idx as u32);
128                    }
129                }
130            }
131
132            if members.len() < MIN_MEMBERS {
133                continue;
134            }
135
136            // Compute centroid from actual member positions
137            let (sum_x, sum_y, sum_z) = members.iter().fold((0.0f32, 0.0f32, 0.0f32), |acc, &m| {
138                let (x, y, z) = headers[m as usize];
139                (acc.0 + x, acc.1 + y, acc.2 + z)
140            });
141            let n = members.len() as f32;
142            let centroid = (sum_x / n, sum_y / n, sum_z / n);
143
144            // Generate label from most common words in member blocks
145            let label = generate_label(&members, block_texts);
146
147            let archetype = Archetype {
148                id: self.next_id,
149                centroid,
150                members,
151                strength: field_energy,
152                reinforcement_count: 1,
153                emerged_ms: now_ms,
154                last_reinforced_ms: now_ms,
155                label,
156            };
157
158            self.archetypes.push(archetype);
159            self.next_id += 1;
160            new_count += 1;
161
162            if self.archetypes.len() >= MAX_ARCHETYPES {
163                break;
164            }
165        }
166
167        new_count
168    }
169
170    /// Reinforce archetypes based on a new activation pattern.
171    /// If activated blocks overlap with an archetype's members, strengthen it.
172    pub fn reinforce(&mut self, activated_blocks: &[(u32, f32)]) {
173        let now_ms = hebbian::now_epoch_ms_pub();
174        let activated_set: HashMap<u32, f32> = activated_blocks.iter().copied().collect();
175
176        for archetype in &mut self.archetypes {
177            let overlap: f32 = archetype
178                .members
179                .iter()
180                .filter_map(|m| activated_set.get(m))
181                .sum();
182
183            if overlap > 0.0 {
184                archetype.strength += overlap * 0.05;
185                archetype.reinforcement_count += 1;
186                archetype.last_reinforced_ms = now_ms;
187            }
188        }
189    }
190
191    /// Decay archetype strengths. Remove dead archetypes.
192    pub fn decay(&mut self) {
193        for a in &mut self.archetypes {
194            a.strength *= ARCHETYPE_DECAY;
195        }
196        // Remove archetypes that have decayed below threshold and have few reinforcements
197        self.archetypes
198            .retain(|a| a.strength > 0.1 || a.reinforcement_count > 5);
199    }
200
201    /// Find which archetype (if any) a query activation best matches.
202    pub fn match_archetype(&self, activated_blocks: &[(u32, f32)]) -> Option<(usize, f32)> {
203        let activated_set: HashMap<u32, f32> = activated_blocks.iter().copied().collect();
204
205        let mut best: Option<(usize, f32)> = None;
206        for (i, archetype) in self.archetypes.iter().enumerate() {
207            let overlap: f32 = archetype
208                .members
209                .iter()
210                .filter_map(|m| activated_set.get(m))
211                .sum();
212
213            let coverage = if archetype.members.is_empty() {
214                0.0
215            } else {
216                let matching = archetype
217                    .members
218                    .iter()
219                    .filter(|m| activated_set.contains_key(m))
220                    .count();
221                matching as f32 / archetype.members.len() as f32
222            };
223
224            let score = overlap * coverage * archetype.strength;
225            if score > 0.0 && (best.is_none() || score > best.unwrap().1) {
226                best = Some((i, score));
227            }
228        }
229        best
230    }
231
232    /// Get statistics.
233    pub fn stats(&self) -> ArchetypeStats {
234        let total_members: usize = self.archetypes.iter().map(|a| a.members.len()).sum();
235        let strongest = self
236            .archetypes
237            .iter()
238            .max_by(|a, b| a.strength.partial_cmp(&b.strength).unwrap());
239
240        ArchetypeStats {
241            archetype_count: self.archetypes.len(),
242            total_members,
243            strongest_label: strongest.map(|a| a.label.clone()),
244            strongest_strength: strongest.map(|a| a.strength),
245        }
246    }
247
248    /// Save to disk.
249    pub fn save(&self, output_dir: &Path) -> Result<(), String> {
250        save_archetypes(output_dir, self)
251    }
252}
253
254pub struct ArchetypeStats {
255    pub archetype_count: usize,
256    pub total_members: usize,
257    pub strongest_label: Option<String>,
258    pub strongest_strength: Option<f32>,
259}
260
261// ─── Helpers ────────────────────────────────────────
262
263fn spatial_dist(a: (f32, f32, f32), b: (f32, f32, f32)) -> f32 {
264    let dx = a.0 - b.0;
265    let dy = a.1 - b.1;
266    let dz = a.2 - b.2;
267    (dx * dx + dy * dy + dz * dz).sqrt()
268}
269
270/// Generate a label from the most common meaningful words in member blocks.
271fn generate_label(members: &[u32], block_texts: &[&str]) -> String {
272    let mut word_counts: HashMap<&str, usize> = HashMap::new();
273    let stopwords = [
274        "a", "the", "is", "of", "and", "to", "in", "it", "on", "for", "that", "this", "with",
275        "was", "are", "be", "has", "had", "not", "but", "from", "or", "an", "at", "by",
276    ];
277
278    for &idx in members {
279        let i = idx as usize;
280        if i < block_texts.len() {
281            for word in block_texts[i].split_whitespace() {
282                let w = word.trim_matches(|c: char| !c.is_alphanumeric());
283                if w.len() > 2 && !stopwords.contains(&w.to_lowercase().as_str()) {
284                    *word_counts.entry(w).or_insert(0) += 1;
285                }
286            }
287        }
288    }
289
290    let mut words: Vec<(&&str, &usize)> = word_counts.iter().collect();
291    words.sort_by(|a, b| b.1.cmp(a.1));
292
293    words
294        .iter()
295        .take(3)
296        .map(|(w, _)| **w)
297        .collect::<Vec<&str>>()
298        .join("-")
299}
300
301// ─── Binary I/O ─────────────────────────────────────
302//
303// archetypes.bin format:
304//   magic: b"ARC1" (4 bytes)
305//   next_id: u32 (4 bytes)
306//   count: u32 (4 bytes)
307//   per archetype:
308//     id: u32, cx/cy/cz: f32×3, strength: f32
309//     reinforcement_count: u32, emerged_ms: u64, last_reinforced_ms: u64
310//     member_count: u16, members: [member_count × u32]
311//     label_len: u16, label_bytes: [label_len]
312
313fn load_archetypes(output_dir: &Path) -> Option<ArchetypeState> {
314    let path = output_dir.join("archetypes.bin");
315    let data = fs::read(&path).ok()?;
316    if data.len() < 12 || &data[0..4] != b"ARC1" {
317        return None;
318    }
319
320    let next_id = u32::from_le_bytes(data[4..8].try_into().unwrap());
321    let count = u32::from_le_bytes(data[8..12].try_into().unwrap()) as usize;
322
323    let mut pos = 12;
324    let mut archetypes = Vec::with_capacity(count);
325
326    for _ in 0..count {
327        if pos + 40 > data.len() {
328            break;
329        }
330
331        let id = u32::from_le_bytes(data[pos..pos + 4].try_into().unwrap());
332        let cx = f32::from_le_bytes(data[pos + 4..pos + 8].try_into().unwrap());
333        let cy = f32::from_le_bytes(data[pos + 8..pos + 12].try_into().unwrap());
334        let cz = f32::from_le_bytes(data[pos + 12..pos + 16].try_into().unwrap());
335        let strength = f32::from_le_bytes(data[pos + 16..pos + 20].try_into().unwrap());
336        let reinforcement_count = u32::from_le_bytes(data[pos + 20..pos + 24].try_into().unwrap());
337        let emerged_ms = u64::from_le_bytes(data[pos + 24..pos + 32].try_into().unwrap());
338        let last_reinforced_ms = u64::from_le_bytes(data[pos + 32..pos + 40].try_into().unwrap());
339        pos += 40;
340
341        if pos + 2 > data.len() {
342            break;
343        }
344        let member_count = u16::from_le_bytes(data[pos..pos + 2].try_into().unwrap()) as usize;
345        pos += 2;
346
347        let mut members = Vec::with_capacity(member_count);
348        for _ in 0..member_count {
349            if pos + 4 > data.len() {
350                break;
351            }
352            members.push(u32::from_le_bytes(data[pos..pos + 4].try_into().unwrap()));
353            pos += 4;
354        }
355
356        if pos + 2 > data.len() {
357            break;
358        }
359        let label_len = u16::from_le_bytes(data[pos..pos + 2].try_into().unwrap()) as usize;
360        pos += 2;
361
362        let label = if pos + label_len <= data.len() {
363            let s = String::from_utf8_lossy(&data[pos..pos + label_len]).to_string();
364            pos += label_len;
365            s
366        } else {
367            String::new()
368        };
369
370        archetypes.push(Archetype {
371            id,
372            centroid: (cx, cy, cz),
373            members,
374            strength,
375            reinforcement_count,
376            emerged_ms,
377            last_reinforced_ms,
378            label,
379        });
380    }
381
382    Some(ArchetypeState {
383        archetypes,
384        next_id,
385    })
386}
387
388fn save_archetypes(output_dir: &Path, state: &ArchetypeState) -> Result<(), String> {
389    let path = output_dir.join("archetypes.bin");
390    let mut buf = Vec::new();
391
392    buf.extend_from_slice(b"ARC1");
393    buf.extend_from_slice(&state.next_id.to_le_bytes());
394    buf.extend_from_slice(&(state.archetypes.len() as u32).to_le_bytes());
395
396    for a in &state.archetypes {
397        buf.extend_from_slice(&a.id.to_le_bytes());
398        buf.extend_from_slice(&a.centroid.0.to_le_bytes());
399        buf.extend_from_slice(&a.centroid.1.to_le_bytes());
400        buf.extend_from_slice(&a.centroid.2.to_le_bytes());
401        buf.extend_from_slice(&a.strength.to_le_bytes());
402        buf.extend_from_slice(&a.reinforcement_count.to_le_bytes());
403        buf.extend_from_slice(&a.emerged_ms.to_le_bytes());
404        buf.extend_from_slice(&a.last_reinforced_ms.to_le_bytes());
405
406        buf.extend_from_slice(&(a.members.len() as u16).to_le_bytes());
407        for &m in &a.members {
408            buf.extend_from_slice(&m.to_le_bytes());
409        }
410
411        let label_bytes = a.label.as_bytes();
412        buf.extend_from_slice(&(label_bytes.len() as u16).to_le_bytes());
413        buf.extend_from_slice(label_bytes);
414    }
415
416    fs::write(&path, &buf).map_err(|e| format!("write archetypes.bin: {}", e))
417}
418
419#[cfg(test)]
420mod tests {
421    use super::*;
422
423    #[test]
424    fn test_spatial_dist() {
425        assert!((spatial_dist((0.0, 0.0, 0.0), (1.0, 0.0, 0.0)) - 1.0).abs() < 0.001);
426        assert!(spatial_dist((0.1, 0.2, 0.3), (0.1, 0.2, 0.3)) < 0.001);
427    }
428
429    #[test]
430    fn test_generate_label() {
431        let texts = [
432            "hello world Rust",
433            "Rust memory system",
434            "Rust binary format",
435        ];
436        let members = vec![0, 1, 2];
437        let label = generate_label(&members, &texts);
438        assert!(label.contains("Rust"));
439    }
440
441    #[test]
442    fn test_reinforce() {
443        let mut state = ArchetypeState {
444            archetypes: vec![Archetype {
445                id: 1,
446                centroid: (0.1, 0.2, 0.3),
447                members: vec![0, 1, 2],
448                strength: 1.0,
449                reinforcement_count: 1,
450                emerged_ms: 1000,
451                last_reinforced_ms: 1000,
452                label: "test".to_string(),
453            }],
454            next_id: 2,
455        };
456
457        state.reinforce(&[(0, 0.9), (1, 0.5), (5, 0.3)]);
458
459        assert!(state.archetypes[0].strength > 1.0); // reinforced
460        assert_eq!(state.archetypes[0].reinforcement_count, 2);
461    }
462
463    #[test]
464    fn test_match_archetype() {
465        let state = ArchetypeState {
466            archetypes: vec![
467                Archetype {
468                    id: 1,
469                    centroid: (0.1, 0.2, 0.3),
470                    members: vec![0, 1, 2],
471                    strength: 2.0,
472                    reinforcement_count: 5,
473                    emerged_ms: 1000,
474                    last_reinforced_ms: 2000,
475                    label: "alpha".to_string(),
476                },
477                Archetype {
478                    id: 2,
479                    centroid: (0.5, 0.5, 0.5),
480                    members: vec![10, 11, 12],
481                    strength: 1.0,
482                    reinforcement_count: 2,
483                    emerged_ms: 1500,
484                    last_reinforced_ms: 1800,
485                    label: "beta".to_string(),
486                },
487            ],
488            next_id: 3,
489        };
490
491        // Activate blocks that overlap with archetype "alpha"
492        let result = state.match_archetype(&[(0, 0.9), (1, 0.7), (2, 0.5)]);
493        assert!(result.is_some());
494        assert_eq!(result.unwrap().0, 0); // matched "alpha"
495
496        // No overlap
497        let result2 = state.match_archetype(&[(99, 0.9)]);
498        assert!(result2.is_none());
499    }
500
501    #[test]
502    fn test_decay() {
503        let mut state = ArchetypeState {
504            archetypes: vec![
505                Archetype {
506                    id: 1,
507                    centroid: (0.0, 0.0, 0.0),
508                    members: vec![0, 1, 2],
509                    strength: 5.0,
510                    reinforcement_count: 10,
511                    emerged_ms: 1000,
512                    last_reinforced_ms: 2000,
513                    label: "strong".to_string(),
514                },
515                Archetype {
516                    id: 2,
517                    centroid: (0.5, 0.5, 0.5),
518                    members: vec![3, 4, 5],
519                    strength: 0.05, // weak, few reinforcements
520                    reinforcement_count: 1,
521                    emerged_ms: 1000,
522                    last_reinforced_ms: 1000,
523                    label: "weak".to_string(),
524                },
525            ],
526            next_id: 3,
527        };
528
529        state.decay();
530
531        // Strong archetype survives
532        assert_eq!(state.archetypes.len(), 1);
533        assert_eq!(state.archetypes[0].id, 1);
534    }
535
536    #[test]
537    fn test_save_load_roundtrip() {
538        let tmp = tempfile::tempdir().expect("create temp dir");
539        let dir = tmp.path();
540
541        let mut state = ArchetypeState {
542            archetypes: Vec::new(),
543            next_id: 1,
544        };
545
546        state.archetypes.push(Archetype {
547            id: 1,
548            centroid: (0.1, 0.2, 0.3),
549            members: vec![0, 5, 10],
550            strength: 2.5,
551            reinforcement_count: 7,
552            emerged_ms: 12345,
553            last_reinforced_ms: 67890,
554            label: "test-archetype".to_string(),
555        });
556        state.next_id = 2;
557
558        state.save(dir).expect("save");
559
560        let loaded = ArchetypeState::load_or_init(dir);
561        assert_eq!(loaded.archetypes.len(), 1);
562        assert_eq!(loaded.archetypes[0].id, 1);
563        assert!((loaded.archetypes[0].centroid.0 - 0.1).abs() < 0.001);
564        assert_eq!(loaded.archetypes[0].members, vec![0, 5, 10]);
565        assert_eq!(loaded.archetypes[0].label, "test-archetype");
566        assert_eq!(loaded.next_id, 2);
567    }
568
569    #[test]
570    fn test_detect_no_field() {
571        let resonance = ResonanceState {
572            instance_id: 42,
573            outgoing: Vec::new(),
574            incoming: Vec::new(),
575            field: HashMap::new(), // empty field
576        };
577
578        let hebb = hebbian::HebbianState {
579            activations: vec![hebbian::ActivationRecord::default(); 5],
580            coactivations: HashMap::new(),
581            fingerprints: Vec::new(),
582        };
583
584        let headers = vec![(0.1, 0.2, 0.3); 5];
585        let texts: Vec<&str> = vec!["a"; 5];
586
587        let mut state = ArchetypeState {
588            archetypes: Vec::new(),
589            next_id: 1,
590        };
591
592        let emerged = state.detect(&resonance, &hebb, &headers, &texts);
593        assert_eq!(emerged, 0);
594    }
595
596    #[test]
597    fn test_stats() {
598        let state = ArchetypeState {
599            archetypes: vec![
600                Archetype {
601                    id: 1,
602                    centroid: (0.0, 0.0, 0.0),
603                    members: vec![0, 1, 2],
604                    strength: 3.0,
605                    reinforcement_count: 5,
606                    emerged_ms: 1000,
607                    last_reinforced_ms: 2000,
608                    label: "alpha".to_string(),
609                },
610                Archetype {
611                    id: 2,
612                    centroid: (0.5, 0.5, 0.5),
613                    members: vec![3, 4],
614                    strength: 1.0,
615                    reinforcement_count: 2,
616                    emerged_ms: 1500,
617                    last_reinforced_ms: 1800,
618                    label: "beta".to_string(),
619                },
620            ],
621            next_id: 3,
622        };
623
624        let stats = state.stats();
625        assert_eq!(stats.archetype_count, 2);
626        assert_eq!(stats.total_members, 5);
627        assert_eq!(stats.strongest_label.unwrap(), "alpha");
628    }
629}