Skip to main content

oxios_kernel/
persistence_hook.rs

1//! PersistenceHook — autonomous persistence after agent execution.
2//!
3//! Two-layer evaluation:
4//! 1. Heuristic: detect markdown documents → auto-save to knowledge (no LLM call)
5//! 2. LLM Reflection: extract facts/preferences → memory, detect missed knowledge saves
6
7use std::sync::Arc;
8
9use anyhow::Result;
10use serde::{Deserialize, Serialize};
11
12use crate::engine::EngineHandle;
13use crate::event_bus::{EventBus, KernelEvent};
14use crate::memory::{MemoryEntry, MemoryManager, MemoryType, content_hash};
15use crate::state_store::StateStore;
16use oxios_markdown::KnowledgeBase;
17use oxios_markdown::types::{NoteMeta, NoteQuality, NoteSource};
18use oxios_memory::memory::sona::TrajectoryStep;
19use oxios_ouroboros::Seed;
20
21/// A planned write to the knowledge vault.
22#[derive(Debug, Clone, Serialize, Deserialize)]
23pub struct KnowledgeWrite {
24    /// Path within the knowledge base (e.g. "notes/rust-design.md").
25    pub path: String,
26    /// Markdown content to write.
27    pub content: String,
28    /// Provenance metadata (RFC-022).
29    #[serde(default = "default_knowledge_meta")]
30    pub meta: NoteMeta,
31}
32
33fn default_knowledge_meta() -> NoteMeta {
34    NoteMeta {
35        author: "agent".to_string(),
36        source: NoteSource::Hook,
37        quality: NoteQuality::Raw,
38        needs_review: true,
39        session_id: None,
40        message_index: None,
41        saved_at: None,
42    }
43}
44
45/// A planned write to agent memory.
46#[derive(Debug, Clone, Serialize, Deserialize)]
47pub struct MemoryWrite {
48    /// Memory content.
49    pub content: String,
50    /// Memory type: "fact" or "episode".
51    #[serde(rename = "type")]
52    pub memory_type: String,
53    /// Importance score 0.0–1.0.
54    pub importance: f32,
55    /// Optional tags.
56    #[serde(default)]
57    pub tags: Vec<String>,
58}
59
60/// Result of evaluating an execution for persistence.
61#[derive(Debug, Clone, Serialize, Deserialize)]
62pub struct PersistencePlan {
63    /// Memory entries to persist.
64    pub memory: Vec<MemoryWrite>,
65    /// Knowledge notes to persist.
66    pub knowledge: Vec<KnowledgeWrite>,
67}
68
69/// Knowledge save record for a session message.
70#[derive(Debug, Clone, Serialize, Deserialize)]
71pub struct KnowledgeSaveRecord {
72    /// Index of the message in the session.
73    pub message_index: usize,
74    /// Path within the knowledge base.
75    pub knowledge_path: String,
76    /// ISO 8601 timestamp.
77    pub saved_at: String,
78    /// How the save was triggered: "hook", "user", "tool".
79    pub source: String,
80}
81
82/// Autonomous persistence hook.
83///
84/// Evaluates agent output after execution and decides what to persist
85/// to memory and/or knowledge, using heuristic rules first and an
86/// optional LLM reflection pass for ambiguous cases.
87pub struct PersistenceHook {
88    memory_manager: Arc<MemoryManager>,
89    knowledge_base: Arc<KnowledgeBase>,
90    engine_handle: Arc<EngineHandle>,
91    state_store: Arc<StateStore>,
92    event_bus: EventBus,
93}
94
95impl PersistenceHook {
96    /// Create a new persistence hook.
97    ///
98    /// The reflection model is resolved live from `engine_handle` on each
99    /// reflection, so it tracks hot-swaps — same source of truth as interview
100    /// and execute.
101    pub fn new(
102        memory_manager: Arc<MemoryManager>,
103        knowledge_base: Arc<KnowledgeBase>,
104        engine_handle: Arc<EngineHandle>,
105        state_store: Arc<StateStore>,
106        event_bus: EventBus,
107    ) -> Self {
108        Self {
109            memory_manager,
110            knowledge_base,
111            engine_handle,
112            state_store,
113            event_bus,
114        }
115    }
116
117    /// Evaluate an execution and produce a persistence plan.
118    ///
119    /// `already_saved_knowledge` = true if tool-calling already did a knowledge write.
120    pub async fn evaluate(
121        &self,
122        seed: &Seed,
123        trajectory: &[TrajectoryStep],
124        output: &str,
125        already_saved_knowledge: bool,
126    ) -> Result<PersistencePlan> {
127        let mut plan = PersistencePlan {
128            memory: Vec::new(),
129            knowledge: Vec::new(),
130        };
131
132        // Layer 1: Heuristic — detect markdown documents
133        if !already_saved_knowledge && looks_like_document(output) {
134            let path = auto_save_path(seed, output);
135            let now = chrono::Utc::now().to_rfc3339();
136            plan.knowledge.push(KnowledgeWrite {
137                path,
138                content: output.to_string(),
139                meta: NoteMeta {
140                    author: "agent".to_string(),
141                    source: NoteSource::Hook,
142                    quality: NoteQuality::Raw,
143                    needs_review: true,
144                    session_id: None,
145                    message_index: None,
146                    saved_at: Some(now),
147                },
148            });
149        }
150
151        // Layer 2: LLM Reflection
152        let knowledge_already_handled = !plan.knowledge.is_empty();
153        let reflection_plan = self
154            .reflect(seed, trajectory, output, knowledge_already_handled)
155            .await;
156        match reflection_plan {
157            Ok(rp) => {
158                plan.memory.extend(rp.memory);
159                if !already_saved_knowledge {
160                    plan.knowledge.extend(rp.knowledge);
161                }
162            }
163            Err(e) => {
164                tracing::warn!(error = %e, "PersistenceHook reflection failed");
165            }
166        }
167
168        Ok(plan)
169    }
170
171    /// Execute a persistence plan (fire-and-forget style, but still awaits I/O).
172    pub async fn execute_plan(
173        &self,
174        mut plan: PersistencePlan,
175        session_id: &str,
176        message_index: usize,
177    ) {
178        // Memory writes
179        for mw in &plan.memory {
180            let memory_type = match mw.memory_type.as_str() {
181                "episode" => MemoryType::Episode,
182                _ => MemoryType::Fact,
183            };
184            let now = chrono::Utc::now();
185            let entry = MemoryEntry {
186                id: uuid::Uuid::new_v4().to_string(),
187                memory_type,
188                tier: memory_type.initial_tier(),
189                content: mw.content.clone(),
190                content_hash: content_hash(&mw.content),
191                tags: mw.tags.clone(),
192                source: "persistence-hook".to_string(),
193                session_id: Some(session_id.to_string()),
194                importance: mw.importance.clamp(0.0, 1.0),
195                pinned: false,
196                protection: crate::memory::ProtectionLevel::None,
197                auto_classified: true,
198                session_appearances: 0,
199                user_corrected: false,
200                seen_in_sessions: vec![],
201                created_at: now,
202                accessed_at: now,
203                modified_at: now,
204                access_count: 0,
205                decay_score: 1.0,
206                compaction_level: 0,
207                compacted_from: vec![],
208                related_ids: vec![],
209                contradicts: None,
210            };
211            match self.memory_manager.remember(entry).await {
212                Ok(_id) => tracing::debug!(session = session_id, "Hook saved memory entry"),
213                Err(e) => tracing::warn!(error = %e, "Hook failed to save memory"),
214            }
215        }
216
217        // Knowledge writes
218        let now_iso = chrono::Utc::now().to_rfc3339();
219        for kw in &mut plan.knowledge {
220            // Backfill session context into meta (reflection path leaves these empty)
221            if kw.meta.session_id.is_none() {
222                kw.meta.session_id = Some(session_id.to_string());
223            }
224            if kw.meta.message_index.is_none() {
225                kw.meta.message_index = Some(message_index);
226            }
227            if kw.meta.saved_at.is_none() {
228                kw.meta.saved_at = Some(now_iso.clone());
229            }
230        }
231        for kw in &plan.knowledge {
232            match self
233                .knowledge_base
234                .note_write_with_meta(&kw.path, &kw.content, &kw.meta)
235            {
236                Ok(true) => {
237                    tracing::info!(
238                        path = %kw.path,
239                        session = session_id,
240                        "Hook saved knowledge note"
241                    );
242                    // Record the save mapping
243                    let record = KnowledgeSaveRecord {
244                        message_index,
245                        knowledge_path: kw.path.clone(),
246                        saved_at: chrono::Utc::now().to_rfc3339(),
247                        source: "hook".to_string(),
248                    };
249                    self.record_save(session_id, &record).await;
250                    // Publish event
251                    let _ = self.event_bus.publish(KernelEvent::KnowledgePersisted {
252                        session_id: session_id.to_string(),
253                        message_index,
254                        path: kw.path.clone(),
255                        source: "hook".to_string(),
256                    });
257                }
258                Ok(false) => {
259                    tracing::warn!(
260                        path = %kw.path,
261                        "Hook skipped knowledge save: path is a user-authored note"
262                    );
263                }
264                Err(e) => {
265                    tracing::warn!(error = %e, path = %kw.path, "Hook failed to save knowledge")
266                }
267            }
268        }
269    }
270
271    /// Record a knowledge save to StateStore.
272    async fn record_save(&self, session_id: &str, record: &KnowledgeSaveRecord) {
273        let saves: Vec<KnowledgeSaveRecord> = self
274            .state_store
275            .load_json("knowledge-saves", session_id)
276            .await
277            .ok()
278            .flatten()
279            .unwrap_or_default();
280        // Note: we load, push, save — not append. This is fine for the
281        // low-throughput knowledge-save path. If contention becomes an
282        // issue, switch to append-only log + compaction.
283        let mut saves = saves;
284        saves.push(record.clone());
285        if let Err(e) = self
286            .state_store
287            .save_json("knowledge-saves", session_id, &saves)
288            .await
289        {
290            tracing::warn!(error = %e, "Failed to record knowledge save");
291        }
292    }
293
294    /// LLM reflection — ask the model what to persist.
295    async fn reflect(
296        &self,
297        seed: &Seed,
298        trajectory: &[TrajectoryStep],
299        output: &str,
300        knowledge_already_handled: bool,
301    ) -> Result<PersistencePlan> {
302        let trajectory_summary: Vec<String> = trajectory
303            .iter()
304            .take(20)
305            .map(|s| {
306                let out_preview = if s.output.len() > 100 {
307                    // Char-boundary safe: multibyte UTF-8 (Korean, emoji)
308                    // would panic on a raw byte slice.
309                    let mut end = 100;
310                    while end > 0 && !s.output.is_char_boundary(end) {
311                        end -= 1;
312                    }
313                    format!("{}...", &s.output[..end])
314                } else {
315                    s.output.clone()
316                };
317                format!("- {} → {}", s.input, out_preview)
318            })
319            .collect();
320        let result_snippet = if output.len() > 500 {
321            let mut end = 500;
322            while end > 0 && !output.is_char_boundary(end) {
323                end -= 1;
324            }
325            format!("{}...", &output[..end])
326        } else {
327            output.to_string()
328        };
329
330        let knowledge_section = if knowledge_already_handled {
331            String::new()
332        } else {
333            "- Knowledge: documents, research, reference material the user would want later. Visible via Web UI.\n"
334                .to_string()
335        };
336
337        let knowledge_field = if knowledge_already_handled {
338            String::new()
339        } else {
340            ",\"knowledge\":[{\"path\":\"cat/file.md\",\"content\":\"...\"}]".to_string()
341        };
342
343        let prompt = format!(
344            "Review this agent execution. Decide what to persist.\n\n\
345             Goal: {}\n\
346             Request: {}\n\
347             Steps:\n{}\n\
348             Result: {}\n\n\
349             Two stores:\n\
350             - Memory: facts about the user, preference corrections, project context. Not visible to the user. Agent's own learning.\n\
351             {knowledge_section}\
352             \n\
353             When saving to knowledge, strip conversational wrapping: greetings, sign-offs, questions to the user, hedging. Extract only substantive content.\n\
354             JSON only:\n\
355             {{\"memory\":[{{\"content\":\"...\",\"type\":\"fact|episode\",\"importance\":0.0-1.0}}]{knowledge_field}}}",
356            seed.goal,
357            seed.original_request,
358            trajectory_summary.join("\n"),
359            result_snippet,
360        );
361
362        // Build a lightweight agent via EngineHandle → Oxi → AgentBuilder
363        let engine = self.engine_handle.get();
364        let agent_config = oxi_sdk::AgentConfig {
365            description: Some("Persistence reflection".into()),
366            model_id: engine.default_model_id().to_string(),
367            system_prompt: Some("You output JSON only. No explanation.".to_string()),
368            max_tokens: Some(512),
369            temperature: Some(0.3),
370            ..Default::default()
371        };
372
373        let agent = engine.oxi().agent(agent_config).build()?;
374
375        let (response, _events) = agent.run(prompt).await?;
376
377        // Parse JSON from response
378        let json_str = response.content.trim();
379        // Strip markdown code fences if present
380        let json_str = json_str
381            .strip_prefix("```json\n")
382            .or_else(|| json_str.strip_prefix("```\n"))
383            .unwrap_or(json_str);
384        let json_str = json_str.strip_suffix("```").unwrap_or(json_str);
385
386        let plan: PersistencePlan = serde_json::from_str(json_str.trim())?;
387        Ok(plan)
388    }
389}
390
391/// Check if content looks like a structured markdown document.
392fn looks_like_document(content: &str) -> bool {
393    if content.len() < 300 {
394        return false;
395    }
396    let has_headers = content.contains("## ") || content.contains("# ");
397    let has_structure = content.contains("- ")
398        || content.contains("* ")
399        || content.contains("```")
400        || content.contains("| ");
401    has_headers && has_structure
402}
403
404/// Generate an auto-save path from the seed goal and content.
405fn auto_save_path(seed: &Seed, content: &str) -> String {
406    let date = chrono::Local::now().format("%Y-%m-%d").to_string();
407
408    // Try to extract a meaningful name from the first ## heading
409    let heading = content
410        .lines()
411        .find(|l| l.starts_with("## ") || l.starts_with("# "))
412        .map(|l| l.trim_start_matches('#').trim().to_string())
413        .filter(|h| !h.is_empty())
414        .unwrap_or_else(|| {
415            seed.goal
416                .split_whitespace()
417                .take(5)
418                .collect::<Vec<_>>()
419                .join("-")
420        });
421
422    // Slugify
423    let slug: String = heading
424        .to_lowercase()
425        .chars()
426        .map(|c| {
427            if c.is_alphanumeric() || c == '-' || c == '_' {
428                c
429            } else {
430                '-'
431            }
432        })
433        .collect();
434    let slug = slug
435        .split('-')
436        .filter(|s| !s.is_empty())
437        .collect::<Vec<_>>()
438        .join("-");
439    let slug = if slug.len() > 60 {
440        // Slug is ASCII-only (slugified above), but guard the slice for
441        // safety and consistency with the rest of the codebase.
442        let mut end = 60;
443        while end > 0 && !slug.is_char_boundary(end) {
444            end -= 1;
445        }
446        slug[..end].to_string()
447    } else {
448        slug
449    };
450
451    format!("notes/{slug}-{date}.md")
452}
453
454#[cfg(test)]
455mod tests {
456    use super::*;
457
458    #[test]
459    fn test_looks_like_document_short() {
460        assert!(!looks_like_document("short text"));
461    }
462
463    #[test]
464    fn test_looks_like_document_structured() {
465        let content = "# Title\n\nSome intro text here that makes this longer than three hundred characters. We need more text to reach the threshold. Adding some more content here. And even more text to be absolutely sure we cross the 300 character limit. Extra padding.\n\n## Section 1\n\n- Item 1\n- Item 2\n\n## Section 2\n\nSome content.";
466        assert!(looks_like_document(content));
467    }
468
469    #[test]
470    fn test_looks_like_document_no_structure() {
471        let content = "## Title\n\nJust plain text without any lists or code blocks. We need to make this longer than 300 characters to pass the length check. Let me add more text. And more text. And even more text to be sure.";
472        assert!(!looks_like_document(content));
473    }
474
475    #[test]
476    fn test_looks_like_document_has_list() {
477        let content = "## Title\n\nSome intro text here that makes this longer than three hundred characters. We need more text to reach the threshold. Adding some more content here. And even more text to be absolutely sure we cross the 300 character limit. Extra padding added. More text here too for good measure.\n\n- Item one\n- Item two";
478        assert!(looks_like_document(content));
479    }
480
481    #[test]
482    fn test_auto_save_path() {
483        let seed = Seed {
484            id: uuid::Uuid::new_v4(),
485            goal: "Write a Rust design document".to_string(),
486            constraints: vec![],
487            acceptance_criteria: vec![],
488            ontology: vec![],
489            created_at: chrono::Utc::now(),
490            generation: 0,
491            parent_seed_id: None,
492            cspace_hint: None,
493            original_request: String::new(),
494            output_schema: None,
495            project_id: None,
496            workspace_context: None,
497            mount_paths: Vec::new(),
498        };
499        let content = "## Rust Ownership Design\n\nContent here...";
500        let path = auto_save_path(&seed, content);
501        assert!(path.starts_with("notes/"));
502        assert!(path.ends_with(".md"));
503        assert!(path.contains("rust"));
504    }
505
506    #[test]
507    fn test_auto_save_path_from_goal() {
508        let seed = Seed {
509            id: uuid::Uuid::new_v4(),
510            goal: "Fetch hacker news".to_string(),
511            constraints: vec![],
512            acceptance_criteria: vec![],
513            ontology: vec![],
514            created_at: chrono::Utc::now(),
515            generation: 0,
516            parent_seed_id: None,
517            cspace_hint: None,
518            original_request: String::new(),
519            output_schema: None,
520            project_id: None,
521            workspace_context: None,
522            mount_paths: Vec::new(),
523        };
524        let content = "Plain text without headings but we still need a path.";
525        let path = auto_save_path(&seed, content);
526        assert!(path.starts_with("notes/"));
527        assert!(path.contains("fetch"));
528    }
529}