Skip to main content

oxios_kernel/
persistence_hook.rs

1//! PersistenceHook — autonomous persistence after agent execution.
2//!
3//! Two-layer evaluation:
4//! 1. Heuristic: detect markdown documents → auto-save to knowledge (no LLM call)
5//! 2. LLM Reflection: extract facts/preferences → memory, detect missed knowledge saves
6
7use std::sync::Arc;
8
9use anyhow::Result;
10use serde::{Deserialize, Serialize};
11
12use crate::engine::EngineHandle;
13use crate::event_bus::{EventBus, KernelEvent};
14use crate::memory::{MemoryEntry, MemoryManager, MemoryType, content_hash};
15use crate::state_store::StateStore;
16use oxios_markdown::KnowledgeBase;
17use oxios_markdown::types::{NoteMeta, NoteQuality, NoteSource};
18use oxios_memory::memory::sona::TrajectoryStep;
19use oxios_ouroboros::Seed;
20
21/// A planned write to the knowledge vault.
22#[derive(Debug, Clone, Serialize, Deserialize)]
23pub struct KnowledgeWrite {
24    /// Path within the knowledge base (e.g. "notes/rust-design.md").
25    pub path: String,
26    /// Markdown content to write.
27    pub content: String,
28    /// Provenance metadata (RFC-022).
29    #[serde(default = "default_knowledge_meta")]
30    pub meta: NoteMeta,
31}
32
33fn default_knowledge_meta() -> NoteMeta {
34    NoteMeta {
35        author: "agent".to_string(),
36        source: NoteSource::Hook,
37        quality: NoteQuality::Raw,
38        needs_review: true,
39        session_id: None,
40        message_index: None,
41        saved_at: None,
42    }
43}
44
45/// A planned write to agent memory.
46#[derive(Debug, Clone, Serialize, Deserialize)]
47pub struct MemoryWrite {
48    /// Memory content.
49    pub content: String,
50    /// Memory type: "fact" or "episode".
51    #[serde(rename = "type")]
52    pub memory_type: String,
53    /// Importance score 0.0–1.0.
54    pub importance: f32,
55    /// Optional tags.
56    #[serde(default)]
57    pub tags: Vec<String>,
58}
59
60/// Result of evaluating an execution for persistence.
61#[derive(Debug, Clone, Serialize, Deserialize)]
62pub struct PersistencePlan {
63    /// Memory entries to persist.
64    pub memory: Vec<MemoryWrite>,
65    /// Knowledge notes to persist.
66    pub knowledge: Vec<KnowledgeWrite>,
67}
68
69/// Knowledge save record for a session message.
70#[derive(Debug, Clone, Serialize, Deserialize)]
71pub struct KnowledgeSaveRecord {
72    /// Index of the message in the session.
73    pub message_index: usize,
74    /// Path within the knowledge base.
75    pub knowledge_path: String,
76    /// ISO 8601 timestamp.
77    pub saved_at: String,
78    /// How the save was triggered: "hook", "user", "tool".
79    pub source: String,
80}
81
82/// Autonomous persistence hook.
83///
84/// Evaluates agent output after execution and decides what to persist
85/// to memory and/or knowledge, using heuristic rules first and an
86/// optional LLM reflection pass for ambiguous cases.
87pub struct PersistenceHook {
88    memory_manager: Arc<MemoryManager>,
89    knowledge_base: Arc<KnowledgeBase>,
90    engine_handle: Arc<EngineHandle>,
91    model_id: String,
92    state_store: Arc<StateStore>,
93    event_bus: EventBus,
94}
95
96impl PersistenceHook {
97    /// Create a new persistence hook.
98    pub fn new(
99        memory_manager: Arc<MemoryManager>,
100        knowledge_base: Arc<KnowledgeBase>,
101        engine_handle: Arc<EngineHandle>,
102        model_id: impl Into<String>,
103        state_store: Arc<StateStore>,
104        event_bus: EventBus,
105    ) -> Self {
106        Self {
107            memory_manager,
108            knowledge_base,
109            engine_handle,
110            model_id: model_id.into(),
111            state_store,
112            event_bus,
113        }
114    }
115
116    /// Evaluate an execution and produce a persistence plan.
117    ///
118    /// `already_saved_knowledge` = true if tool-calling already did a knowledge write.
119    pub async fn evaluate(
120        &self,
121        seed: &Seed,
122        trajectory: &[TrajectoryStep],
123        output: &str,
124        already_saved_knowledge: bool,
125    ) -> Result<PersistencePlan> {
126        let mut plan = PersistencePlan {
127            memory: Vec::new(),
128            knowledge: Vec::new(),
129        };
130
131        // Layer 1: Heuristic — detect markdown documents
132        if !already_saved_knowledge && looks_like_document(output) {
133            let path = auto_save_path(seed, output);
134            let now = chrono::Utc::now().to_rfc3339();
135            plan.knowledge.push(KnowledgeWrite {
136                path,
137                content: output.to_string(),
138                meta: NoteMeta {
139                    author: "agent".to_string(),
140                    source: NoteSource::Hook,
141                    quality: NoteQuality::Raw,
142                    needs_review: true,
143                    session_id: None,
144                    message_index: None,
145                    saved_at: Some(now),
146                },
147            });
148        }
149
150        // Layer 2: LLM Reflection
151        let knowledge_already_handled = !plan.knowledge.is_empty();
152        let reflection_plan = self
153            .reflect(seed, trajectory, output, knowledge_already_handled)
154            .await;
155        match reflection_plan {
156            Ok(rp) => {
157                plan.memory.extend(rp.memory);
158                if !already_saved_knowledge {
159                    plan.knowledge.extend(rp.knowledge);
160                }
161            }
162            Err(e) => {
163                tracing::warn!(error = %e, "PersistenceHook reflection failed");
164            }
165        }
166
167        Ok(plan)
168    }
169
170    /// Execute a persistence plan (fire-and-forget style, but still awaits I/O).
171    pub async fn execute_plan(
172        &self,
173        mut plan: PersistencePlan,
174        session_id: &str,
175        message_index: usize,
176    ) {
177        // Memory writes
178        for mw in &plan.memory {
179            let memory_type = match mw.memory_type.as_str() {
180                "episode" => MemoryType::Episode,
181                _ => MemoryType::Fact,
182            };
183            let now = chrono::Utc::now();
184            let entry = MemoryEntry {
185                id: uuid::Uuid::new_v4().to_string(),
186                memory_type,
187                tier: memory_type.initial_tier(),
188                content: mw.content.clone(),
189                content_hash: content_hash(&mw.content),
190                tags: mw.tags.clone(),
191                source: "persistence-hook".to_string(),
192                session_id: Some(session_id.to_string()),
193                importance: mw.importance.clamp(0.0, 1.0),
194                pinned: false,
195                protection: crate::memory::ProtectionLevel::None,
196                auto_classified: true,
197                session_appearances: 0,
198                user_corrected: false,
199                seen_in_sessions: vec![],
200                created_at: now,
201                accessed_at: now,
202                modified_at: now,
203                access_count: 0,
204                decay_score: 1.0,
205                compaction_level: 0,
206                compacted_from: vec![],
207                related_ids: vec![],
208                contradicts: None,
209            };
210            match self.memory_manager.remember(entry).await {
211                Ok(_id) => tracing::debug!(session = session_id, "Hook saved memory entry"),
212                Err(e) => tracing::warn!(error = %e, "Hook failed to save memory"),
213            }
214        }
215
216        // Knowledge writes
217        let now_iso = chrono::Utc::now().to_rfc3339();
218        for kw in &mut plan.knowledge {
219            // Backfill session context into meta (reflection path leaves these empty)
220            if kw.meta.session_id.is_none() {
221                kw.meta.session_id = Some(session_id.to_string());
222            }
223            if kw.meta.message_index.is_none() {
224                kw.meta.message_index = Some(message_index);
225            }
226            if kw.meta.saved_at.is_none() {
227                kw.meta.saved_at = Some(now_iso.clone());
228            }
229        }
230        for kw in &plan.knowledge {
231            match self
232                .knowledge_base
233                .note_write_with_meta(&kw.path, &kw.content, &kw.meta)
234            {
235                Ok(true) => {
236                    tracing::info!(
237                        path = %kw.path,
238                        session = session_id,
239                        "Hook saved knowledge note"
240                    );
241                    // Record the save mapping
242                    let record = KnowledgeSaveRecord {
243                        message_index,
244                        knowledge_path: kw.path.clone(),
245                        saved_at: chrono::Utc::now().to_rfc3339(),
246                        source: "hook".to_string(),
247                    };
248                    self.record_save(session_id, &record).await;
249                    // Publish event
250                    let _ = self.event_bus.publish(KernelEvent::KnowledgePersisted {
251                        session_id: session_id.to_string(),
252                        message_index,
253                        path: kw.path.clone(),
254                        source: "hook".to_string(),
255                    });
256                }
257                Ok(false) => {
258                    tracing::warn!(
259                        path = %kw.path,
260                        "Hook skipped knowledge save: path is a user-authored note"
261                    );
262                }
263                Err(e) => {
264                    tracing::warn!(error = %e, path = %kw.path, "Hook failed to save knowledge")
265                }
266            }
267        }
268    }
269
270    /// Record a knowledge save to StateStore.
271    async fn record_save(&self, session_id: &str, record: &KnowledgeSaveRecord) {
272        let saves: Vec<KnowledgeSaveRecord> = self
273            .state_store
274            .load_json("knowledge-saves", session_id)
275            .await
276            .ok()
277            .flatten()
278            .unwrap_or_default();
279        // Note: we load, push, save — not append. This is fine for the
280        // low-throughput knowledge-save path. If contention becomes an
281        // issue, switch to append-only log + compaction.
282        let mut saves = saves;
283        saves.push(record.clone());
284        if let Err(e) = self
285            .state_store
286            .save_json("knowledge-saves", session_id, &saves)
287            .await
288        {
289            tracing::warn!(error = %e, "Failed to record knowledge save");
290        }
291    }
292
293    /// LLM reflection — ask the model what to persist.
294    async fn reflect(
295        &self,
296        seed: &Seed,
297        trajectory: &[TrajectoryStep],
298        output: &str,
299        knowledge_already_handled: bool,
300    ) -> Result<PersistencePlan> {
301        let trajectory_summary: Vec<String> = trajectory
302            .iter()
303            .take(20)
304            .map(|s| {
305                let out_preview = if s.output.len() > 100 {
306                    format!("{}...", &s.output[..100])
307                } else {
308                    s.output.clone()
309                };
310                format!("- {} → {}", s.input, out_preview)
311            })
312            .collect();
313
314        let result_snippet = if output.len() > 500 {
315            format!("{}...", &output[..500])
316        } else {
317            output.to_string()
318        };
319
320        let knowledge_section = if knowledge_already_handled {
321            String::new()
322        } else {
323            "- Knowledge: documents, research, reference material the user would want later. Visible via Web UI.\n"
324                .to_string()
325        };
326
327        let knowledge_field = if knowledge_already_handled {
328            String::new()
329        } else {
330            ",\"knowledge\":[{\"path\":\"cat/file.md\",\"content\":\"...\"}]".to_string()
331        };
332
333        let prompt = format!(
334            "Review this agent execution. Decide what to persist.\n\n\
335             Goal: {}\n\
336             Request: {}\n\
337             Steps:\n{}\n\
338             Result: {}\n\n\
339             Two stores:\n\
340             - Memory: facts about the user, preference corrections, project context. Not visible to the user. Agent's own learning.\n\
341             {knowledge_section}\
342             \n\
343             When saving to knowledge, strip conversational wrapping: greetings, sign-offs, questions to the user, hedging. Extract only substantive content.\n\
344             JSON only:\n\
345             {{\"memory\":[{{\"content\":\"...\",\"type\":\"fact|episode\",\"importance\":0.0-1.0}}]{knowledge_field}}}",
346            seed.goal,
347            seed.original_request,
348            trajectory_summary.join("\n"),
349            result_snippet,
350        );
351
352        // Build a lightweight agent via EngineHandle → Oxi → AgentBuilder
353        let engine = self.engine_handle.get();
354        let agent_config = oxi_sdk::AgentConfig {
355            description: Some("Persistence reflection".into()),
356            model_id: self.model_id.clone(),
357            system_prompt: Some("You output JSON only. No explanation.".to_string()),
358            max_tokens: Some(512),
359            temperature: Some(0.3),
360            ..Default::default()
361        };
362
363        let agent = engine.oxi().agent(agent_config).build()?;
364
365        let (response, _events) = agent.run(prompt).await?;
366
367        // Parse JSON from response
368        let json_str = response.content.trim();
369        // Strip markdown code fences if present
370        let json_str = json_str
371            .strip_prefix("```json\n")
372            .or_else(|| json_str.strip_prefix("```\n"))
373            .unwrap_or(json_str);
374        let json_str = json_str.strip_suffix("```").unwrap_or(json_str);
375
376        let plan: PersistencePlan = serde_json::from_str(json_str.trim())?;
377        Ok(plan)
378    }
379}
380
381/// Check if content looks like a structured markdown document.
382fn looks_like_document(content: &str) -> bool {
383    if content.len() < 300 {
384        return false;
385    }
386    let has_headers = content.contains("## ") || content.contains("# ");
387    let has_structure = content.contains("- ")
388        || content.contains("* ")
389        || content.contains("```")
390        || content.contains("| ");
391    has_headers && has_structure
392}
393
394/// Generate an auto-save path from the seed goal and content.
395fn auto_save_path(seed: &Seed, content: &str) -> String {
396    let date = chrono::Local::now().format("%Y-%m-%d").to_string();
397
398    // Try to extract a meaningful name from the first ## heading
399    let heading = content
400        .lines()
401        .find(|l| l.starts_with("## ") || l.starts_with("# "))
402        .map(|l| l.trim_start_matches('#').trim().to_string())
403        .filter(|h| !h.is_empty())
404        .unwrap_or_else(|| {
405            seed.goal
406                .split_whitespace()
407                .take(5)
408                .collect::<Vec<_>>()
409                .join("-")
410        });
411
412    // Slugify
413    let slug: String = heading
414        .to_lowercase()
415        .chars()
416        .map(|c| {
417            if c.is_alphanumeric() || c == '-' || c == '_' {
418                c
419            } else {
420                '-'
421            }
422        })
423        .collect();
424    let slug = slug
425        .split('-')
426        .filter(|s| !s.is_empty())
427        .collect::<Vec<_>>()
428        .join("-");
429    let slug = if slug.len() > 60 {
430        slug[..60].to_string()
431    } else {
432        slug
433    };
434
435    format!("notes/{slug}-{date}.md")
436}
437
438#[cfg(test)]
439mod tests {
440    use super::*;
441
442    #[test]
443    fn test_looks_like_document_short() {
444        assert!(!looks_like_document("short text"));
445    }
446
447    #[test]
448    fn test_looks_like_document_structured() {
449        let content = "# Title\n\nSome intro text here that makes this longer than three hundred characters. We need more text to reach the threshold. Adding some more content here. And even more text to be absolutely sure we cross the 300 character limit. Extra padding.\n\n## Section 1\n\n- Item 1\n- Item 2\n\n## Section 2\n\nSome content.";
450        assert!(looks_like_document(content));
451    }
452
453    #[test]
454    fn test_looks_like_document_no_structure() {
455        let content = "## Title\n\nJust plain text without any lists or code blocks. We need to make this longer than 300 characters to pass the length check. Let me add more text. And more text. And even more text to be sure.";
456        assert!(!looks_like_document(content));
457    }
458
459    #[test]
460    fn test_looks_like_document_has_list() {
461        let content = "## Title\n\nSome intro text here that makes this longer than three hundred characters. We need more text to reach the threshold. Adding some more content here. And even more text to be absolutely sure we cross the 300 character limit. Extra padding added. More text here too for good measure.\n\n- Item one\n- Item two";
462        assert!(looks_like_document(content));
463    }
464
465    #[test]
466    fn test_auto_save_path() {
467        let seed = Seed {
468            id: uuid::Uuid::new_v4(),
469            goal: "Write a Rust design document".to_string(),
470            constraints: vec![],
471            acceptance_criteria: vec![],
472            ontology: vec![],
473            created_at: chrono::Utc::now(),
474            generation: 0,
475            parent_seed_id: None,
476            cspace_hint: None,
477            original_request: String::new(),
478            output_schema: None,
479            project_id: None,
480            workspace_context: None,
481            mount_paths: Vec::new(),
482        };
483        let content = "## Rust Ownership Design\n\nContent here...";
484        let path = auto_save_path(&seed, content);
485        assert!(path.starts_with("notes/"));
486        assert!(path.ends_with(".md"));
487        assert!(path.contains("rust"));
488    }
489
490    #[test]
491    fn test_auto_save_path_from_goal() {
492        let seed = Seed {
493            id: uuid::Uuid::new_v4(),
494            goal: "Fetch hacker news".to_string(),
495            constraints: vec![],
496            acceptance_criteria: vec![],
497            ontology: vec![],
498            created_at: chrono::Utc::now(),
499            generation: 0,
500            parent_seed_id: None,
501            cspace_hint: None,
502            original_request: String::new(),
503            output_schema: None,
504            project_id: None,
505            workspace_context: None,
506            mount_paths: Vec::new(),
507        };
508        let content = "Plain text without headings but we still need a path.";
509        let path = auto_save_path(&seed, content);
510        assert!(path.starts_with("notes/"));
511        assert!(path.contains("fetch"));
512    }
513}