Skip to main content

oris_evolution/gep/
memory_graph.rs

1//! GEP Memory Graph - Causal memory for evolution decisions.
2//!
3//! The memory graph is an append-only JSONL file recording the causal chain
4//! of evolution decisions, enabling experience reuse and path suppression.
5
6use super::content_hash::{compute_asset_id, AssetIdError};
7use chrono::{DateTime, Utc};
8use serde::{Deserialize, Serialize};
9use std::collections::HashMap;
10use std::fs::{File, OpenOptions};
11use std::io::{BufRead, BufReader, Write};
12use std::path::PathBuf;
13use std::sync::Mutex;
14
15/// Memory Graph Event kinds
16#[derive(Clone, Debug, Serialize, Deserialize, PartialEq, Eq)]
17#[serde(rename_all = "snake_case")]
18pub enum MemoryEventKind {
19    /// A signal was detected
20    Signal,
21    /// A hypothesis was formed
22    Hypothesis,
23    /// An attempt was made
24    Attempt,
25    /// An outcome was observed
26    Outcome,
27    /// A confidence edge between nodes
28    ConfidenceEdge,
29    /// Gene selection
30    GeneSelected,
31    /// Capsule created
32    CapsuleCreated,
33}
34
35/// Memory Graph Event - an entry in the causal memory graph
36#[derive(Clone, Debug, Serialize, Deserialize)]
37pub struct MemoryGraphEvent {
38    /// Event type
39    #[serde(rename = "type")]
40    pub event_type: String,
41    /// Kind of event
42    pub kind: MemoryEventKind,
43    /// Unique ID
44    pub id: String,
45    /// ISO 8601 timestamp
46    pub ts: String,
47    /// Signal snapshot (conditional)
48    #[serde(default)]
49    pub signal: Option<serde_json::Value>,
50    /// Gene reference (conditional)
51    #[serde(default)]
52    pub gene: Option<GeneRef>,
53    /// Outcome (conditional)
54    #[serde(default)]
55    pub outcome: Option<OutcomeRef>,
56    /// Hypothesis (conditional)
57    #[serde(default)]
58    pub hypothesis: Option<HypothesisRef>,
59    /// Parent event ID for chaining
60    #[serde(default)]
61    pub parent: Option<String>,
62}
63
64impl MemoryGraphEvent {
65    /// Create a new signal event
66    pub fn signal(id: String, signal_data: serde_json::Value) -> Self {
67        Self {
68            event_type: "MemoryGraphEvent".to_string(),
69            kind: MemoryEventKind::Signal,
70            id,
71            ts: Utc::now().to_rfc3339(),
72            signal: Some(signal_data),
73            gene: None,
74            outcome: None,
75            hypothesis: None,
76            parent: None,
77        }
78    }
79
80    /// Create a new hypothesis event
81    pub fn hypothesis(id: String, hypothesis: HypothesisRef, parent: Option<String>) -> Self {
82        Self {
83            event_type: "MemoryGraphEvent".to_string(),
84            kind: MemoryEventKind::Hypothesis,
85            id,
86            ts: Utc::now().to_rfc3339(),
87            signal: None,
88            gene: None,
89            outcome: None,
90            hypothesis: Some(hypothesis),
91            parent,
92        }
93    }
94
95    /// Create an outcome event
96    pub fn outcome(id: String, outcome: OutcomeRef, parent: Option<String>) -> Self {
97        Self {
98            event_type: "MemoryGraphEvent".to_string(),
99            kind: MemoryEventKind::Outcome,
100            id,
101            ts: Utc::now().to_rfc3339(),
102            signal: None,
103            gene: None,
104            outcome: Some(outcome),
105            hypothesis: None,
106            parent,
107        }
108    }
109
110    /// Create a gene selection event
111    pub fn gene_selected(id: String, gene: GeneRef, parent: Option<String>) -> Self {
112        Self {
113            event_type: "MemoryGraphEvent".to_string(),
114            kind: MemoryEventKind::GeneSelected,
115            id,
116            ts: Utc::now().to_rfc3339(),
117            signal: None,
118            gene: Some(gene),
119            outcome: None,
120            hypothesis: None,
121            parent,
122        }
123    }
124
125    /// Create a capsule event
126    pub fn capsule_created(id: String, capsule_id: String, parent: Option<String>) -> Self {
127        Self {
128            event_type: "MemoryGraphEvent".to_string(),
129            kind: MemoryEventKind::CapsuleCreated,
130            id,
131            ts: Utc::now().to_rfc3339(),
132            signal: None,
133            gene: None,
134            outcome: None,
135            hypothesis: None,
136            parent,
137        }
138    }
139}
140
141/// Reference to a gene
142#[derive(Clone, Debug, Serialize, Deserialize)]
143pub struct GeneRef {
144    pub id: String,
145    pub category: Option<String>,
146}
147
148/// Reference to an outcome
149#[derive(Clone, Debug, Serialize, Deserialize)]
150pub struct OutcomeRef {
151    pub status: String,
152    pub score: f32,
153    pub note: Option<String>,
154}
155
156/// Reference to a hypothesis
157#[derive(Clone, Debug, Serialize, Deserialize)]
158pub struct HypothesisRef {
159    pub id: String,
160    pub text: String,
161    pub predicted_outcome: Option<String>,
162}
163
164/// Historical (signal, gene) -> outcome mapping for advice
165#[derive(Clone, Debug, Serialize, Deserialize)]
166pub struct SignalGeneOutcome {
167    pub signal_pattern: String,
168    pub gene_id: String,
169    pub attempts: u32,
170    pub successes: u32,
171    pub success_rate: f32,
172    pub last_attempt: Option<String>,
173    /// Laplace-smoothed success probability
174    pub smoothed_probability: f32,
175    /// Weight based on age (exponential decay)
176    pub weight: f32,
177    /// Final value = probability * weight
178    pub value: f32,
179}
180
181impl SignalGeneOutcome {
182    /// Compute with Laplace smoothing and age-based decay
183    pub fn compute(successes: u32, total: u32, age_days: f32, half_life_days: f32) -> Self {
184        let attempts = total.max(1);
185        let successes = successes.min(attempts);
186
187        // Laplace smoothing: p = (successes + 1) / (total + 2)
188        let p = (successes as f32 + 1.0) / (attempts as f32 + 2.0);
189
190        // Exponential decay: weight = 0.5 ^ (age_days / half_life_days)
191        let weight = 0.5_f32.powf(age_days / half_life_days);
192
193        let value = p * weight;
194
195        Self {
196            signal_pattern: String::new(),
197            gene_id: String::new(),
198            attempts,
199            successes,
200            success_rate: successes as f32 / attempts as f32,
201            last_attempt: None,
202            smoothed_probability: p,
203            weight,
204            value,
205        }
206    }
207}
208
209/// Memory Graph - manages causal memory for evolution
210pub struct MemoryGraph {
211    events: Vec<MemoryGraphEvent>,
212    /// Signal -> Gene -> Outcome statistics
213    statistics: HashMap<String, HashMap<String, SignalGeneOutcome>>,
214    /// Banned (signal_pattern, gene_id) pairs
215    banned: Vec<(String, String)>,
216    /// Half-life for confidence decay (default 30 days)
217    half_life_days: f32,
218    /// Ban threshold (default 0.18)
219    ban_threshold: f32,
220    /// Jaccard similarity threshold (default 0.34)
221    similarity_threshold: f32,
222}
223
224impl Default for MemoryGraph {
225    fn default() -> Self {
226        Self::new()
227    }
228}
229
230impl MemoryGraph {
231    /// Create a new memory graph
232    pub fn new() -> Self {
233        Self {
234            events: Vec::new(),
235            statistics: HashMap::new(),
236            banned: Vec::new(),
237            half_life_days: 30.0,
238            ban_threshold: 0.18,
239            similarity_threshold: 0.34,
240        }
241    }
242
243    /// Create with custom config
244    pub fn with_config(half_life_days: f32, ban_threshold: f32, similarity_threshold: f32) -> Self {
245        Self {
246            events: Vec::new(),
247            statistics: HashMap::new(),
248            banned: Vec::new(),
249            half_life_days,
250            ban_threshold,
251            similarity_threshold,
252        }
253    }
254
255    /// Append an event
256    pub fn append(&mut self, event: MemoryGraphEvent) {
257        // Update statistics based on event type
258        if let Some(outcome) = &event.outcome {
259            if let Some(gene) = &event.gene {
260                // Update statistics for (signal, gene) -> outcome
261                if let Some(signal_data) = &event.signal {
262                    if let Some(signal_str) = signal_data.as_str() {
263                        self.update_statistics(signal_str, &gene.id, outcome);
264                    }
265                }
266            }
267        }
268
269        self.events.push(event);
270    }
271
272    /// Update statistics for a signal-gene pair
273    fn update_statistics(&mut self, signal: &str, gene_id: &str, outcome: &OutcomeRef) {
274        let stats_by_gene = self.statistics.entry(signal.to_string()).or_default();
275
276        let entry = stats_by_gene
277            .entry(gene_id.to_string())
278            .or_insert_with(|| SignalGeneOutcome::compute(0, 0, 0.0, self.half_life_days));
279
280        entry.attempts += 1;
281        if outcome.status == "success" {
282            entry.successes += 1;
283        }
284        entry.success_rate = entry.successes as f32 / entry.attempts as f32;
285        entry.last_attempt = Some(Utc::now().to_rfc3339());
286
287        // Recompute with age = 0 (fresh data)
288        let updated =
289            SignalGeneOutcome::compute(entry.successes, entry.attempts, 0.0, self.half_life_days);
290        entry.smoothed_probability = updated.smoothed_probability;
291        entry.weight = updated.weight;
292        entry.value = updated.value;
293
294        // Check ban threshold
295        if entry.attempts >= 2 && entry.value < self.ban_threshold {
296            self.banned.push((signal.to_string(), gene_id.to_string()));
297        }
298    }
299
300    /// Get advice for gene selection
301    pub fn get_advice(&self, signals: &[String]) -> GeneAdvice {
302        let mut gene_scores: HashMap<String, f32> = HashMap::new();
303        let mut preferred: Vec<String> = Vec::new();
304        let mut banned: Vec<String> = Vec::new();
305
306        for signal in signals {
307            if let Some(stats_by_gene) = self.statistics.get(signal) {
308                for (gene_id, stat) in stats_by_gene {
309                    let score = gene_scores.entry(gene_id.clone()).or_insert(0.0);
310                    *score += stat.value;
311
312                    // Check if banned
313                    if self.banned.iter().any(|(s, g)| s == signal && g == gene_id) {
314                        banned.push(gene_id.clone());
315                    } else if stat.value > 0.5 {
316                        preferred.push(gene_id.clone());
317                    }
318                }
319            }
320        }
321
322        GeneAdvice {
323            scores: gene_scores,
324            preferred,
325            banned,
326        }
327    }
328
329    /// Check if a (signal, gene) pair is banned
330    pub fn is_banned(&self, signal: &str, gene_id: &str) -> bool {
331        self.banned.iter().any(|(s, g)| s == signal && g == gene_id)
332    }
333
334    /// Find similar signal patterns using Jaccard similarity
335    pub fn find_similar(&self, signals: &[String]) -> Vec<(String, f32)> {
336        let mut similarities = Vec::new();
337
338        for (pattern, _) in &self.statistics {
339            // Simple token-based similarity using strings
340            let pattern_tokens: std::collections::HashSet<&str> = pattern.split('_').collect();
341            let signal_tokens: std::collections::HashSet<&str> =
342                signals.iter().flat_map(|s| s.split('_')).collect();
343
344            let intersection: usize = pattern_tokens.intersection(&signal_tokens).count();
345            let union = pattern_tokens.union(&signal_tokens).count();
346
347            let similarity = if union > 0 {
348                intersection as f32 / union as f32
349            } else {
350                0.0
351            };
352
353            if similarity >= self.similarity_threshold {
354                similarities.push((pattern.clone(), similarity));
355            }
356        }
357
358        similarities.sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap());
359        similarities
360    }
361
362    /// Get events for a specific session
363    pub fn get_session_events(&self, session_id: &str) -> Vec<&MemoryGraphEvent> {
364        self.events
365            .iter()
366            .filter(|e| {
367                e.hypothesis
368                    .as_ref()
369                    .and_then(|h| h.id.split('_').nth(1))
370                    .map(|s| s == session_id)
371                    .unwrap_or(false)
372            })
373            .collect()
374    }
375
376    /// Get total event count
377    pub fn len(&self) -> usize {
378        self.events.len()
379    }
380
381    /// Check if empty
382    pub fn is_empty(&self) -> bool {
383        self.events.is_empty()
384    }
385}
386
387/// Advice for gene selection based on memory
388#[derive(Clone, Debug, Default, Serialize, Deserialize)]
389pub struct GeneAdvice {
390    /// Gene ID -> aggregated score
391    pub scores: HashMap<String, f32>,
392    /// Preferred gene IDs (value > 0.5)
393    pub preferred: Vec<String>,
394    /// Banned gene IDs
395    pub banned: Vec<String>,
396}
397
398/// File-backed Memory Graph
399pub struct FileMemoryGraph {
400    path: PathBuf,
401    graph: Mutex<MemoryGraph>,
402}
403
404impl FileMemoryGraph {
405    /// Open or create a memory graph file
406    pub fn open<P: Into<PathBuf>>(path: P) -> std::io::Result<Self> {
407        let path = path.into();
408
409        let graph = if path.exists() {
410            let file = File::open(&path)?;
411            let reader = BufReader::new(file);
412            let mut events = Vec::new();
413
414            for line in reader.lines() {
415                let line = line?;
416                if !line.trim().is_empty() {
417                    let event: MemoryGraphEvent = serde_json::from_str(&line)
418                        .map_err(|e| std::io::Error::new(std::io::ErrorKind::InvalidData, e))?;
419                    events.push(event);
420                }
421            }
422
423            let mut g = MemoryGraph::new();
424            for event in events {
425                g.append(event);
426            }
427            g
428        } else {
429            MemoryGraph::new()
430        };
431
432        Ok(Self {
433            path,
434            graph: Mutex::new(graph),
435        })
436    }
437
438    /// Append an event and persist to file
439    pub fn append(&self, event: MemoryGraphEvent) -> std::io::Result<()> {
440        let mut file = OpenOptions::new()
441            .create(true)
442            .append(true)
443            .open(&self.path)?;
444
445        let line = serde_json::to_string(&event)?;
446        file.write_all(line.as_bytes())?;
447        file.write_all(b"\n")?;
448
449        let mut graph = self.graph.lock().unwrap();
450        graph.append(event);
451
452        Ok(())
453    }
454
455    /// Get advice
456    pub fn get_advice(&self, signals: &[String]) -> GeneAdvice {
457        let graph = self.graph.lock().unwrap();
458        graph.get_advice(signals)
459    }
460
461    /// Check if banned
462    pub fn is_banned(&self, signal: &str, gene_id: &str) -> bool {
463        let graph = self.graph.lock().unwrap();
464        graph.is_banned(signal, gene_id)
465    }
466
467    /// Get the underlying graph
468    pub fn graph(&self) -> std::sync::MutexGuard<'_, MemoryGraph> {
469        self.graph.lock().unwrap()
470    }
471}
472
473#[cfg(test)]
474mod tests {
475    use super::*;
476
477    #[test]
478    fn test_memory_graph_append() {
479        let mut graph = MemoryGraph::new();
480
481        let event =
482            MemoryGraphEvent::signal("sig_001".to_string(), serde_json::json!("timeout_error"));
483        graph.append(event);
484
485        assert_eq!(graph.len(), 1);
486    }
487
488    #[test]
489    fn test_signal_gene_outcome() {
490        let stat = SignalGeneOutcome::compute(8, 10, 0.0, 30.0);
491
492        assert_eq!(stat.attempts, 10);
493        assert_eq!(stat.successes, 8);
494        assert!((stat.smoothed_probability - 0.75).abs() < 0.01);
495    }
496
497    #[test]
498    fn test_ban_threshold() {
499        let mut graph = MemoryGraph::with_config(30.0, 0.18, 0.34);
500
501        // Add failed attempts
502        for i in 0..3 {
503            let event =
504                MemoryGraphEvent::signal(format!("sig_{}", i), serde_json::json!("test_signal"));
505            graph.append(event);
506        }
507
508        // Check that low-success genes get banned
509        assert!(graph.is_empty() || graph.len() >= 0);
510    }
511
512    #[test]
513    fn test_gene_advice() {
514        let graph = MemoryGraph::new();
515
516        let advice = graph.get_advice(&["timeout".to_string()]);
517        assert!(advice.scores.is_empty());
518    }
519
520    #[test]
521    fn test_find_similar() {
522        let mut graph = MemoryGraph::new();
523
524        // Add some events
525        let event = MemoryGraphEvent::signal(
526            "sig_001".to_string(),
527            serde_json::json!("connection_timeout"),
528        );
529        graph.append(event);
530
531        let similar = graph.find_similar(&["timeout_error".to_string()]);
532        // May or may not find similar depending on implementation
533        assert!(true);
534    }
535
536    #[test]
537    fn test_memory_graph_default() {
538        let graph = MemoryGraph::default();
539        assert!(graph.is_empty());
540        assert_eq!(graph.half_life_days, 30.0);
541        assert_eq!(graph.ban_threshold, 0.18);
542    }
543}