1use std::collections::HashMap;
14use std::fs;
15use std::path::Path;
16
17use crate::hebbian;
18use crate::resonance::ResonanceState;
19
20const SEED_THRESHOLD: f32 = 2.0;
24const MIN_MEMBERS: usize = 3;
26const MAX_ARCHETYPES: usize = 100;
28const CLUSTER_RADIUS: f32 = 0.15;
30const ARCHETYPE_DECAY: f32 = 0.98;
32
33#[derive(Clone, Debug)]
37pub struct Archetype {
38 pub id: u32,
40 pub centroid: (f32, f32, f32),
42 pub members: Vec<u32>,
44 pub strength: f32,
46 pub reinforcement_count: u32,
48 pub emerged_ms: u64,
50 pub last_reinforced_ms: u64,
52 pub label: String,
54}
55
56pub struct ArchetypeState {
58 pub archetypes: Vec<Archetype>,
59 next_id: u32,
60}
61
62impl ArchetypeState {
63 pub fn load_or_init(output_dir: &Path) -> Self {
65 load_archetypes(output_dir).unwrap_or_else(|| Self {
66 archetypes: Vec::new(),
67 next_id: 1,
68 })
69 }
70
71 pub fn detect(
74 &mut self,
75 resonance: &ResonanceState,
76 hebb: &hebbian::HebbianState,
77 headers: &[(f32, f32, f32)],
78 block_texts: &[&str],
79 ) -> usize {
80 let now_ms = hebbian::now_epoch_ms_pub();
81 let mut new_count = 0;
82
83 let hot_cells: Vec<((i16, i16, i16), f32)> = resonance
85 .field
86 .iter()
87 .filter(|(_, &v)| v >= SEED_THRESHOLD)
88 .map(|(&k, &v)| (k, v))
89 .collect();
90
91 for ((qx, qy, qz), field_energy) in hot_cells {
92 let cx = qx as f32 / 20.0;
94 let cy = qy as f32 / 20.0;
95 let cz = qz as f32 / 20.0;
96
97 if self
99 .archetypes
100 .iter()
101 .any(|a| spatial_dist(a.centroid, (cx, cy, cz)) < CLUSTER_RADIUS)
102 {
103 if let Some(a) = self.archetypes.iter_mut().min_by(|a, b| {
105 spatial_dist(a.centroid, (cx, cy, cz))
106 .partial_cmp(&spatial_dist(b.centroid, (cx, cy, cz)))
107 .unwrap()
108 }) {
109 a.strength += field_energy * 0.1;
110 a.reinforcement_count += 1;
111 a.last_reinforced_ms = now_ms;
112 }
113 continue;
114 }
115
116 let mut members = Vec::new();
118 for (idx, (bx, by, bz)) in headers.iter().enumerate() {
119 let dx = cx - bx;
120 let dy = cy - by;
121 let dz = cz - bz;
122 let dist = (dx * dx + dy * dy + dz * dz).sqrt();
123
124 if dist < CLUSTER_RADIUS {
125 if idx < hebb.activations.len() && hebb.activations[idx].activation_count > 0 {
127 members.push(idx as u32);
128 }
129 }
130 }
131
132 if members.len() < MIN_MEMBERS {
133 continue;
134 }
135
136 let (sum_x, sum_y, sum_z) = members.iter().fold((0.0f32, 0.0f32, 0.0f32), |acc, &m| {
138 let (x, y, z) = headers[m as usize];
139 (acc.0 + x, acc.1 + y, acc.2 + z)
140 });
141 let n = members.len() as f32;
142 let centroid = (sum_x / n, sum_y / n, sum_z / n);
143
144 let label = generate_label(&members, block_texts);
146
147 let archetype = Archetype {
148 id: self.next_id,
149 centroid,
150 members,
151 strength: field_energy,
152 reinforcement_count: 1,
153 emerged_ms: now_ms,
154 last_reinforced_ms: now_ms,
155 label,
156 };
157
158 self.archetypes.push(archetype);
159 self.next_id += 1;
160 new_count += 1;
161
162 if self.archetypes.len() >= MAX_ARCHETYPES {
163 break;
164 }
165 }
166
167 new_count
168 }
169
170 pub fn reinforce(&mut self, activated_blocks: &[(u32, f32)]) {
173 let now_ms = hebbian::now_epoch_ms_pub();
174 let activated_set: HashMap<u32, f32> = activated_blocks.iter().copied().collect();
175
176 for archetype in &mut self.archetypes {
177 let overlap: f32 = archetype
178 .members
179 .iter()
180 .filter_map(|m| activated_set.get(m))
181 .sum();
182
183 if overlap > 0.0 {
184 archetype.strength += overlap * 0.05;
185 archetype.reinforcement_count += 1;
186 archetype.last_reinforced_ms = now_ms;
187 }
188 }
189 }
190
191 pub fn decay(&mut self) {
193 for a in &mut self.archetypes {
194 a.strength *= ARCHETYPE_DECAY;
195 }
196 self.archetypes
198 .retain(|a| a.strength > 0.1 || a.reinforcement_count > 5);
199 }
200
201 pub fn match_archetype(&self, activated_blocks: &[(u32, f32)]) -> Option<(usize, f32)> {
203 let activated_set: HashMap<u32, f32> = activated_blocks.iter().copied().collect();
204
205 let mut best: Option<(usize, f32)> = None;
206 for (i, archetype) in self.archetypes.iter().enumerate() {
207 let overlap: f32 = archetype
208 .members
209 .iter()
210 .filter_map(|m| activated_set.get(m))
211 .sum();
212
213 let coverage = if archetype.members.is_empty() {
214 0.0
215 } else {
216 let matching = archetype
217 .members
218 .iter()
219 .filter(|m| activated_set.contains_key(m))
220 .count();
221 matching as f32 / archetype.members.len() as f32
222 };
223
224 let score = overlap * coverage * archetype.strength;
225 if score > 0.0 && (best.is_none() || score > best.unwrap().1) {
226 best = Some((i, score));
227 }
228 }
229 best
230 }
231
232 pub fn stats(&self) -> ArchetypeStats {
234 let total_members: usize = self.archetypes.iter().map(|a| a.members.len()).sum();
235 let strongest = self
236 .archetypes
237 .iter()
238 .max_by(|a, b| a.strength.partial_cmp(&b.strength).unwrap());
239
240 ArchetypeStats {
241 archetype_count: self.archetypes.len(),
242 total_members,
243 strongest_label: strongest.map(|a| a.label.clone()),
244 strongest_strength: strongest.map(|a| a.strength),
245 }
246 }
247
248 pub fn save(&self, output_dir: &Path) -> Result<(), String> {
250 save_archetypes(output_dir, self)
251 }
252}
253
254pub struct ArchetypeStats {
255 pub archetype_count: usize,
256 pub total_members: usize,
257 pub strongest_label: Option<String>,
258 pub strongest_strength: Option<f32>,
259}
260
261fn spatial_dist(a: (f32, f32, f32), b: (f32, f32, f32)) -> f32 {
264 let dx = a.0 - b.0;
265 let dy = a.1 - b.1;
266 let dz = a.2 - b.2;
267 (dx * dx + dy * dy + dz * dz).sqrt()
268}
269
270fn generate_label(members: &[u32], block_texts: &[&str]) -> String {
272 let mut word_counts: HashMap<&str, usize> = HashMap::new();
273 let stopwords = [
274 "a", "the", "is", "of", "and", "to", "in", "it", "on", "for", "that", "this", "with",
275 "was", "are", "be", "has", "had", "not", "but", "from", "or", "an", "at", "by",
276 ];
277
278 for &idx in members {
279 let i = idx as usize;
280 if i < block_texts.len() {
281 for word in block_texts[i].split_whitespace() {
282 let w = word.trim_matches(|c: char| !c.is_alphanumeric());
283 if w.len() > 2 && !stopwords.contains(&w.to_lowercase().as_str()) {
284 *word_counts.entry(w).or_insert(0) += 1;
285 }
286 }
287 }
288 }
289
290 let mut words: Vec<(&&str, &usize)> = word_counts.iter().collect();
291 words.sort_by(|a, b| b.1.cmp(a.1));
292
293 words
294 .iter()
295 .take(3)
296 .map(|(w, _)| **w)
297 .collect::<Vec<&str>>()
298 .join("-")
299}
300
301fn load_archetypes(output_dir: &Path) -> Option<ArchetypeState> {
314 let path = output_dir.join("archetypes.bin");
315 let data = fs::read(&path).ok()?;
316 if data.len() < 12 || &data[0..4] != b"ARC1" {
317 return None;
318 }
319
320 let next_id = u32::from_le_bytes(data[4..8].try_into().unwrap());
321 let count = u32::from_le_bytes(data[8..12].try_into().unwrap()) as usize;
322
323 let mut pos = 12;
324 let mut archetypes = Vec::with_capacity(count);
325
326 for _ in 0..count {
327 if pos + 40 > data.len() {
328 break;
329 }
330
331 let id = u32::from_le_bytes(data[pos..pos + 4].try_into().unwrap());
332 let cx = f32::from_le_bytes(data[pos + 4..pos + 8].try_into().unwrap());
333 let cy = f32::from_le_bytes(data[pos + 8..pos + 12].try_into().unwrap());
334 let cz = f32::from_le_bytes(data[pos + 12..pos + 16].try_into().unwrap());
335 let strength = f32::from_le_bytes(data[pos + 16..pos + 20].try_into().unwrap());
336 let reinforcement_count = u32::from_le_bytes(data[pos + 20..pos + 24].try_into().unwrap());
337 let emerged_ms = u64::from_le_bytes(data[pos + 24..pos + 32].try_into().unwrap());
338 let last_reinforced_ms = u64::from_le_bytes(data[pos + 32..pos + 40].try_into().unwrap());
339 pos += 40;
340
341 if pos + 2 > data.len() {
342 break;
343 }
344 let member_count = u16::from_le_bytes(data[pos..pos + 2].try_into().unwrap()) as usize;
345 pos += 2;
346
347 let mut members = Vec::with_capacity(member_count);
348 for _ in 0..member_count {
349 if pos + 4 > data.len() {
350 break;
351 }
352 members.push(u32::from_le_bytes(data[pos..pos + 4].try_into().unwrap()));
353 pos += 4;
354 }
355
356 if pos + 2 > data.len() {
357 break;
358 }
359 let label_len = u16::from_le_bytes(data[pos..pos + 2].try_into().unwrap()) as usize;
360 pos += 2;
361
362 let label = if pos + label_len <= data.len() {
363 let s = String::from_utf8_lossy(&data[pos..pos + label_len]).to_string();
364 pos += label_len;
365 s
366 } else {
367 String::new()
368 };
369
370 archetypes.push(Archetype {
371 id,
372 centroid: (cx, cy, cz),
373 members,
374 strength,
375 reinforcement_count,
376 emerged_ms,
377 last_reinforced_ms,
378 label,
379 });
380 }
381
382 Some(ArchetypeState {
383 archetypes,
384 next_id,
385 })
386}
387
388fn save_archetypes(output_dir: &Path, state: &ArchetypeState) -> Result<(), String> {
389 let path = output_dir.join("archetypes.bin");
390 let mut buf = Vec::new();
391
392 buf.extend_from_slice(b"ARC1");
393 buf.extend_from_slice(&state.next_id.to_le_bytes());
394 buf.extend_from_slice(&(state.archetypes.len() as u32).to_le_bytes());
395
396 for a in &state.archetypes {
397 buf.extend_from_slice(&a.id.to_le_bytes());
398 buf.extend_from_slice(&a.centroid.0.to_le_bytes());
399 buf.extend_from_slice(&a.centroid.1.to_le_bytes());
400 buf.extend_from_slice(&a.centroid.2.to_le_bytes());
401 buf.extend_from_slice(&a.strength.to_le_bytes());
402 buf.extend_from_slice(&a.reinforcement_count.to_le_bytes());
403 buf.extend_from_slice(&a.emerged_ms.to_le_bytes());
404 buf.extend_from_slice(&a.last_reinforced_ms.to_le_bytes());
405
406 buf.extend_from_slice(&(a.members.len() as u16).to_le_bytes());
407 for &m in &a.members {
408 buf.extend_from_slice(&m.to_le_bytes());
409 }
410
411 let label_bytes = a.label.as_bytes();
412 buf.extend_from_slice(&(label_bytes.len() as u16).to_le_bytes());
413 buf.extend_from_slice(label_bytes);
414 }
415
416 fs::write(&path, &buf).map_err(|e| format!("write archetypes.bin: {}", e))
417}
418
419#[cfg(test)]
420mod tests {
421 use super::*;
422
423 #[test]
424 fn test_spatial_dist() {
425 assert!((spatial_dist((0.0, 0.0, 0.0), (1.0, 0.0, 0.0)) - 1.0).abs() < 0.001);
426 assert!(spatial_dist((0.1, 0.2, 0.3), (0.1, 0.2, 0.3)) < 0.001);
427 }
428
429 #[test]
430 fn test_generate_label() {
431 let texts = [
432 "hello world Rust",
433 "Rust memory system",
434 "Rust binary format",
435 ];
436 let members = vec![0, 1, 2];
437 let label = generate_label(&members, &texts);
438 assert!(label.contains("Rust"));
439 }
440
441 #[test]
442 fn test_reinforce() {
443 let mut state = ArchetypeState {
444 archetypes: vec![Archetype {
445 id: 1,
446 centroid: (0.1, 0.2, 0.3),
447 members: vec![0, 1, 2],
448 strength: 1.0,
449 reinforcement_count: 1,
450 emerged_ms: 1000,
451 last_reinforced_ms: 1000,
452 label: "test".to_string(),
453 }],
454 next_id: 2,
455 };
456
457 state.reinforce(&[(0, 0.9), (1, 0.5), (5, 0.3)]);
458
459 assert!(state.archetypes[0].strength > 1.0); assert_eq!(state.archetypes[0].reinforcement_count, 2);
461 }
462
463 #[test]
464 fn test_match_archetype() {
465 let state = ArchetypeState {
466 archetypes: vec![
467 Archetype {
468 id: 1,
469 centroid: (0.1, 0.2, 0.3),
470 members: vec![0, 1, 2],
471 strength: 2.0,
472 reinforcement_count: 5,
473 emerged_ms: 1000,
474 last_reinforced_ms: 2000,
475 label: "alpha".to_string(),
476 },
477 Archetype {
478 id: 2,
479 centroid: (0.5, 0.5, 0.5),
480 members: vec![10, 11, 12],
481 strength: 1.0,
482 reinforcement_count: 2,
483 emerged_ms: 1500,
484 last_reinforced_ms: 1800,
485 label: "beta".to_string(),
486 },
487 ],
488 next_id: 3,
489 };
490
491 let result = state.match_archetype(&[(0, 0.9), (1, 0.7), (2, 0.5)]);
493 assert!(result.is_some());
494 assert_eq!(result.unwrap().0, 0); let result2 = state.match_archetype(&[(99, 0.9)]);
498 assert!(result2.is_none());
499 }
500
501 #[test]
502 fn test_decay() {
503 let mut state = ArchetypeState {
504 archetypes: vec![
505 Archetype {
506 id: 1,
507 centroid: (0.0, 0.0, 0.0),
508 members: vec![0, 1, 2],
509 strength: 5.0,
510 reinforcement_count: 10,
511 emerged_ms: 1000,
512 last_reinforced_ms: 2000,
513 label: "strong".to_string(),
514 },
515 Archetype {
516 id: 2,
517 centroid: (0.5, 0.5, 0.5),
518 members: vec![3, 4, 5],
519 strength: 0.05, reinforcement_count: 1,
521 emerged_ms: 1000,
522 last_reinforced_ms: 1000,
523 label: "weak".to_string(),
524 },
525 ],
526 next_id: 3,
527 };
528
529 state.decay();
530
531 assert_eq!(state.archetypes.len(), 1);
533 assert_eq!(state.archetypes[0].id, 1);
534 }
535
536 #[test]
537 fn test_save_load_roundtrip() {
538 let tmp = tempfile::tempdir().expect("create temp dir");
539 let dir = tmp.path();
540
541 let mut state = ArchetypeState {
542 archetypes: Vec::new(),
543 next_id: 1,
544 };
545
546 state.archetypes.push(Archetype {
547 id: 1,
548 centroid: (0.1, 0.2, 0.3),
549 members: vec![0, 5, 10],
550 strength: 2.5,
551 reinforcement_count: 7,
552 emerged_ms: 12345,
553 last_reinforced_ms: 67890,
554 label: "test-archetype".to_string(),
555 });
556 state.next_id = 2;
557
558 state.save(dir).expect("save");
559
560 let loaded = ArchetypeState::load_or_init(dir);
561 assert_eq!(loaded.archetypes.len(), 1);
562 assert_eq!(loaded.archetypes[0].id, 1);
563 assert!((loaded.archetypes[0].centroid.0 - 0.1).abs() < 0.001);
564 assert_eq!(loaded.archetypes[0].members, vec![0, 5, 10]);
565 assert_eq!(loaded.archetypes[0].label, "test-archetype");
566 assert_eq!(loaded.next_id, 2);
567 }
568
569 #[test]
570 fn test_detect_no_field() {
571 let resonance = ResonanceState {
572 instance_id: 42,
573 outgoing: Vec::new(),
574 incoming: Vec::new(),
575 field: HashMap::new(), };
577
578 let hebb = hebbian::HebbianState {
579 activations: vec![hebbian::ActivationRecord::default(); 5],
580 coactivations: HashMap::new(),
581 fingerprints: Vec::new(),
582 };
583
584 let headers = vec![(0.1, 0.2, 0.3); 5];
585 let texts: Vec<&str> = vec!["a"; 5];
586
587 let mut state = ArchetypeState {
588 archetypes: Vec::new(),
589 next_id: 1,
590 };
591
592 let emerged = state.detect(&resonance, &hebb, &headers, &texts);
593 assert_eq!(emerged, 0);
594 }
595
596 #[test]
597 fn test_stats() {
598 let state = ArchetypeState {
599 archetypes: vec![
600 Archetype {
601 id: 1,
602 centroid: (0.0, 0.0, 0.0),
603 members: vec![0, 1, 2],
604 strength: 3.0,
605 reinforcement_count: 5,
606 emerged_ms: 1000,
607 last_reinforced_ms: 2000,
608 label: "alpha".to_string(),
609 },
610 Archetype {
611 id: 2,
612 centroid: (0.5, 0.5, 0.5),
613 members: vec![3, 4],
614 strength: 1.0,
615 reinforcement_count: 2,
616 emerged_ms: 1500,
617 last_reinforced_ms: 1800,
618 label: "beta".to_string(),
619 },
620 ],
621 next_id: 3,
622 };
623
624 let stats = state.stats();
625 assert_eq!(stats.archetype_count, 2);
626 assert_eq!(stats.total_members, 5);
627 assert_eq!(stats.strongest_label.unwrap(), "alpha");
628 }
629}