Skip to main content

aivcs_core/
memory_context.rs

1//! Memory, context, and retrieval for EPIC5.
2//!
3//! Provides indexed retrieval over snapshots and memories, decision rationale
4//! capture, token-budgeted context assembly, and retention/compaction policies.
5
6use std::path::{Path, PathBuf};
7
8use chrono::{DateTime, Duration, Utc};
9use serde::{Deserialize, Serialize};
10
11use crate::domain::{AivcsError, Result};
12use oxidized_state::storage_traits::ContentDigest;
13
14// ---------------------------------------------------------------------------
15// Memory Index — Indexed retrieval over memories and run history
16// ---------------------------------------------------------------------------
17
18/// A relevance-scored memory hit from an index query.
19#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
20pub struct MemoryHit {
21    pub key: String,
22    pub content: String,
23    pub commit_id: String,
24    pub score: f64,
25    pub created_at: DateTime<Utc>,
26}
27
28/// Match strategy for memory queries.
29#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
30#[serde(rename_all = "snake_case")]
31pub enum MatchStrategy {
32    /// Exact key match.
33    Exact,
34    /// Substring match on key or content.
35    Substring,
36    /// Keyword overlap scoring.
37    Keyword,
38}
39
40/// Query parameters for memory index lookup.
41#[derive(Debug, Clone, Serialize, Deserialize)]
42pub struct MemoryQuery {
43    pub query_text: String,
44    pub strategy: MatchStrategy,
45    pub max_results: usize,
46    /// Optional: restrict to memories from these commit IDs.
47    pub scope_commits: Option<Vec<String>>,
48}
49
50impl MemoryQuery {
51    pub fn keyword(text: impl Into<String>, max_results: usize) -> Self {
52        Self {
53            query_text: text.into(),
54            strategy: MatchStrategy::Keyword,
55            max_results,
56            scope_commits: None,
57        }
58    }
59
60    pub fn exact(key: impl Into<String>) -> Self {
61        Self {
62            query_text: key.into(),
63            strategy: MatchStrategy::Exact,
64            max_results: 1,
65            scope_commits: None,
66        }
67    }
68
69    pub fn scoped(mut self, commits: Vec<String>) -> Self {
70        self.scope_commits = Some(commits);
71        self
72    }
73}
74
75/// In-process memory index backed by a flat list of `MemoryRecord`-like entries.
76#[derive(Debug, Clone, Serialize, Deserialize)]
77pub struct MemoryIndex {
78    entries: Vec<MemoryEntry>,
79}
80
81/// Indexed entry (lightweight projection of MemoryRecord).
82#[derive(Debug, Clone, Serialize, Deserialize)]
83pub struct MemoryEntry {
84    pub key: String,
85    pub content: String,
86    pub commit_id: String,
87    pub created_at: DateTime<Utc>,
88}
89
90impl MemoryIndex {
91    pub fn new() -> Self {
92        Self {
93            entries: Vec::new(),
94        }
95    }
96
97    /// Ingest a batch of memory entries.
98    pub fn ingest(&mut self, entries: Vec<MemoryEntry>) {
99        self.entries.extend(entries);
100    }
101
102    /// Number of indexed entries.
103    pub fn len(&self) -> usize {
104        self.entries.len()
105    }
106
107    pub fn is_empty(&self) -> bool {
108        self.entries.is_empty()
109    }
110
111    /// Query the index and return scored hits.
112    pub fn query(&self, q: &MemoryQuery) -> Vec<MemoryHit> {
113        let candidates: Vec<&MemoryEntry> = self
114            .entries
115            .iter()
116            .filter(|e| match &q.scope_commits {
117                Some(commits) => commits.contains(&e.commit_id),
118                None => true,
119            })
120            .collect();
121
122        let mut scored: Vec<MemoryHit> = candidates
123            .into_iter()
124            .filter_map(|e| {
125                let score = match q.strategy {
126                    MatchStrategy::Exact => {
127                        if e.key == q.query_text {
128                            1.0
129                        } else {
130                            0.0
131                        }
132                    }
133                    MatchStrategy::Substring => {
134                        let q_lower = q.query_text.to_lowercase();
135                        if e.key.to_lowercase().contains(&q_lower) {
136                            0.8
137                        } else if e.content.to_lowercase().contains(&q_lower) {
138                            0.5
139                        } else {
140                            0.0
141                        }
142                    }
143                    MatchStrategy::Keyword => keyword_score(&q.query_text, &e.key, &e.content),
144                };
145                if score > 0.0 {
146                    Some(MemoryHit {
147                        key: e.key.clone(),
148                        content: e.content.clone(),
149                        commit_id: e.commit_id.clone(),
150                        score,
151                        created_at: e.created_at,
152                    })
153                } else {
154                    None
155                }
156            })
157            .collect();
158
159        scored.sort_by(|a, b| {
160            b.score
161                .partial_cmp(&a.score)
162                .unwrap_or(std::cmp::Ordering::Equal)
163        });
164        scored.truncate(q.max_results);
165        scored
166    }
167}
168
169impl Default for MemoryIndex {
170    fn default() -> Self {
171        Self::new()
172    }
173}
174
175/// Simple keyword overlap scorer: fraction of query words found in key+content.
176fn keyword_score(query: &str, key: &str, content: &str) -> f64 {
177    let words: Vec<&str> = query.split_whitespace().collect();
178    if words.is_empty() {
179        return 0.0;
180    }
181    let haystack = format!("{} {}", key, content).to_lowercase();
182    let matched = words
183        .iter()
184        .filter(|w| haystack.contains(&w.to_lowercase()))
185        .count();
186    matched as f64 / words.len() as f64
187}
188
189// ---------------------------------------------------------------------------
190// Decision Rationale — Capture reasoning for major actions
191// ---------------------------------------------------------------------------
192
193/// Severity/importance of a decision.
194#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Serialize, Deserialize)]
195#[serde(rename_all = "snake_case")]
196pub enum DecisionImportance {
197    Low,
198    Medium,
199    High,
200    Critical,
201}
202
203/// A captured decision rationale tied to a run event.
204#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
205pub struct DecisionRationale {
206    pub decision_id: String,
207    pub run_id: String,
208    pub event_seq: u64,
209    pub action: String,
210    pub reasoning: String,
211    pub alternatives_considered: Vec<String>,
212    pub importance: DecisionImportance,
213    pub outcome: Option<String>,
214    pub recorded_at: DateTime<Utc>,
215}
216
217/// Ledger for decision rationales within a run.
218#[derive(Debug, Clone, Default, Serialize, Deserialize)]
219pub struct RationaleLedger {
220    entries: Vec<DecisionRationale>,
221}
222
223impl RationaleLedger {
224    pub fn new() -> Self {
225        Self {
226            entries: Vec::new(),
227        }
228    }
229
230    /// Record a decision.
231    pub fn record(&mut self, rationale: DecisionRationale) {
232        self.entries.push(rationale);
233    }
234
235    /// Query rationales for a specific run.
236    pub fn for_run(&self, run_id: &str) -> Vec<&DecisionRationale> {
237        self.entries.iter().filter(|r| r.run_id == run_id).collect()
238    }
239
240    /// Query rationales by action substring.
241    pub fn for_action(&self, action_pattern: &str) -> Vec<&DecisionRationale> {
242        let pattern = action_pattern.to_lowercase();
243        self.entries
244            .iter()
245            .filter(|r| r.action.to_lowercase().contains(&pattern))
246            .collect()
247    }
248
249    /// Get high-importance decisions (for planning context).
250    pub fn important_decisions(
251        &self,
252        min_importance: DecisionImportance,
253    ) -> Vec<&DecisionRationale> {
254        self.entries
255            .iter()
256            .filter(|r| r.importance >= min_importance)
257            .collect()
258    }
259
260    /// Total recorded decisions.
261    pub fn len(&self) -> usize {
262        self.entries.len()
263    }
264
265    pub fn is_empty(&self) -> bool {
266        self.entries.is_empty()
267    }
268
269    /// All entries (for serialization / persistence).
270    pub fn entries(&self) -> &[DecisionRationale] {
271        &self.entries
272    }
273}
274
275// ---------------------------------------------------------------------------
276// Context Assembly — Token-budgeted context packing
277// ---------------------------------------------------------------------------
278
279/// A context segment with estimated token cost.
280#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
281pub struct ContextSegment {
282    pub label: String,
283    pub content: String,
284    pub priority: u32,
285    pub estimated_tokens: usize,
286}
287
288impl ContextSegment {
289    /// Create a segment with auto-estimated token count (~4 chars per token).
290    pub fn new(label: impl Into<String>, content: impl Into<String>, priority: u32) -> Self {
291        let content = content.into();
292        let estimated_tokens = estimate_tokens(&content);
293        Self {
294            label: label.into(),
295            content,
296            priority,
297            estimated_tokens,
298        }
299    }
300
301    /// Create a segment with explicit token count.
302    pub fn with_tokens(
303        label: impl Into<String>,
304        content: impl Into<String>,
305        priority: u32,
306        tokens: usize,
307    ) -> Self {
308        Self {
309            label: label.into(),
310            content: content.into(),
311            priority,
312            estimated_tokens: tokens,
313        }
314    }
315}
316
317/// Assembled context window.
318#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
319pub struct AssembledContext {
320    pub segments: Vec<ContextSegment>,
321    pub total_tokens: usize,
322    pub budget: usize,
323    pub dropped_count: usize,
324}
325
326impl AssembledContext {
327    /// Render all segments into a single string.
328    pub fn render(&self) -> String {
329        self.segments
330            .iter()
331            .map(|s| format!("## {}\n{}", s.label, s.content))
332            .collect::<Vec<_>>()
333            .join("\n\n")
334    }
335}
336
337/// Assembles context segments within a token budget.
338#[derive(Debug, Clone)]
339pub struct ContextAssembler {
340    pub token_budget: usize,
341}
342
343impl ContextAssembler {
344    pub fn new(token_budget: usize) -> Self {
345        Self { token_budget }
346    }
347
348    /// Pack segments by priority (highest first) within the token budget.
349    pub fn assemble(&self, mut segments: Vec<ContextSegment>) -> AssembledContext {
350        // Sort by priority descending (highest priority first).
351        segments.sort_by(|a, b| b.priority.cmp(&a.priority));
352
353        let mut included = Vec::new();
354        let mut total_tokens = 0usize;
355        let mut dropped_count = 0usize;
356
357        for seg in segments {
358            if total_tokens + seg.estimated_tokens <= self.token_budget {
359                total_tokens += seg.estimated_tokens;
360                included.push(seg);
361            } else {
362                dropped_count += 1;
363            }
364        }
365
366        AssembledContext {
367            segments: included,
368            total_tokens,
369            budget: self.token_budget,
370            dropped_count,
371        }
372    }
373}
374
375/// Estimate token count from text (~4 chars per token heuristic).
376pub fn estimate_tokens(text: &str) -> usize {
377    text.len().div_ceil(4)
378}
379
380// ---------------------------------------------------------------------------
381// Compaction Policy — Retention and compaction for memory stores
382// ---------------------------------------------------------------------------
383
384/// Strategy for compacting old memories.
385#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
386#[serde(rename_all = "snake_case")]
387pub enum CompactionStrategy {
388    /// Delete memories older than the threshold.
389    DeleteOld,
390    /// Keep only the most recent N per key.
391    KeepRecentPerKey,
392    /// Merge old entries into summary records.
393    Summarize,
394}
395
396/// Policy for memory retention and compaction.
397#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
398pub struct CompactionPolicy {
399    pub max_age_days: Option<u64>,
400    pub max_entries_per_key: Option<usize>,
401    pub strategy: CompactionStrategy,
402}
403
404impl CompactionPolicy {
405    pub fn delete_older_than(days: u64) -> Self {
406        Self {
407            max_age_days: Some(days),
408            max_entries_per_key: None,
409            strategy: CompactionStrategy::DeleteOld,
410        }
411    }
412
413    pub fn keep_recent(per_key: usize) -> Self {
414        Self {
415            max_age_days: None,
416            max_entries_per_key: Some(per_key),
417            strategy: CompactionStrategy::KeepRecentPerKey,
418        }
419    }
420
421    /// Apply compaction to a list of memory entries, returning retained entries
422    /// and the count of compacted entries.
423    pub fn compact(&self, entries: &[MemoryEntry]) -> CompactionResult {
424        let now = Utc::now();
425        let mut retained: Vec<MemoryEntry> = Vec::new();
426        let mut compacted = 0usize;
427
428        // Phase 1: age-based filtering
429        let after_age: Vec<&MemoryEntry> = entries
430            .iter()
431            .filter(|e| {
432                if let Some(max_days) = self.max_age_days {
433                    let cutoff = now - Duration::days(max_days as i64);
434                    if e.created_at < cutoff {
435                        return false;
436                    }
437                }
438                true
439            })
440            .collect();
441
442        let age_compacted = entries.len() - after_age.len();
443        compacted += age_compacted;
444
445        // Phase 2: per-key limit
446        match self.max_entries_per_key {
447            Some(max_per_key) => {
448                use std::collections::HashMap;
449                let mut by_key: HashMap<&str, Vec<&MemoryEntry>> = HashMap::new();
450                for e in &after_age {
451                    by_key.entry(e.key.as_str()).or_default().push(e);
452                }
453                for (_key, mut group) in by_key {
454                    // Sort newest first.
455                    group.sort_by(|a, b| b.created_at.cmp(&a.created_at));
456                    for (i, e) in group.into_iter().enumerate() {
457                        if i < max_per_key {
458                            retained.push(e.clone());
459                        } else {
460                            compacted += 1;
461                        }
462                    }
463                }
464            }
465            None => {
466                retained = after_age.into_iter().cloned().collect();
467            }
468        }
469
470        // Restore chronological order.
471        retained.sort_by(|a, b| a.created_at.cmp(&b.created_at));
472
473        CompactionResult {
474            retained,
475            compacted_count: compacted,
476        }
477    }
478}
479
480/// Result of a compaction operation.
481#[derive(Debug, Clone, Serialize, Deserialize)]
482pub struct CompactionResult {
483    pub retained: Vec<MemoryEntry>,
484    pub compacted_count: usize,
485}
486
487// ---------------------------------------------------------------------------
488// Memory Context Artifact — Auditable persistence
489// ---------------------------------------------------------------------------
490
491/// Auditable artifact for memory context state.
492#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
493pub struct MemoryContextArtifact {
494    pub run_id: String,
495    pub index_size: usize,
496    pub rationale_count: usize,
497    pub context_tokens_used: usize,
498    pub context_budget: usize,
499    pub compaction_applied: bool,
500    pub created_at: DateTime<Utc>,
501}
502
503/// Persist a memory context artifact with digest verification.
504pub fn write_memory_context_artifact(
505    artifact: &MemoryContextArtifact,
506    dir: &Path,
507) -> Result<PathBuf> {
508    let run_dir = dir.join(&artifact.run_id);
509    std::fs::create_dir_all(&run_dir)?;
510
511    let path = run_dir.join("memory_context.json");
512    let digest_path = run_dir.join("memory_context.digest");
513    let json = serde_json::to_vec_pretty(artifact)?;
514    let digest = ContentDigest::from_bytes(&json).as_str().to_string();
515
516    std::fs::write(&path, &json)?;
517    std::fs::write(&digest_path, digest.as_bytes())?;
518
519    Ok(path)
520}
521
522/// Read and verify a memory context artifact.
523pub fn read_memory_context_artifact(run_id: &str, dir: &Path) -> Result<MemoryContextArtifact> {
524    let run_dir = dir.join(run_id);
525    let path = run_dir.join("memory_context.json");
526    let digest_path = run_dir.join("memory_context.digest");
527
528    let json = std::fs::read(&path)?;
529    let digest = std::fs::read_to_string(&digest_path)?;
530    let actual = ContentDigest::from_bytes(&json).as_str().to_string();
531    if digest.trim() != actual {
532        return Err(AivcsError::DigestMismatch {
533            expected: digest.trim().to_string(),
534            actual,
535        });
536    }
537    let artifact: MemoryContextArtifact = serde_json::from_slice(&json)?;
538    Ok(artifact)
539}