Skip to main content

oxios_kernel/
knowledge_dream.rs

1//! KnowledgeDream — periodic LLM curation of agent-generated knowledge notes (RFC-022).
2//!
3//! Phase 1: Scan — find notes with `needs_review: true` and `quality: raw`.
4//! Phase 2: Curate — LLM strips conversational artifacts and improves structure.
5//! Phase 3: Write Back — overwrite with curated content, commit to git first.
6//! Phase 4: Report — save dream report.
7
8use std::sync::Arc;
9
10use anyhow::Result;
11use chrono::Utc;
12use serde::{Deserialize, Serialize};
13
14use crate::engine::EngineHandle;
15use crate::git_layer::GitLayer;
16use oxios_markdown::KnowledgeBase;
17use oxios_markdown::types::{NoteMeta, NoteQuality, NoteSource};
18
19/// Configuration for knowledge dream.
20#[derive(Debug, Clone, Serialize, Deserialize)]
21pub struct KnowledgeDreamConfig {
22    /// Enable/disable knowledge dream.
23    #[serde(default)]
24    pub enabled: bool,
25    /// Minimum raw notes before triggering a dream.
26    #[serde(default = "default_min_raw_notes")]
27    pub min_raw_notes: usize,
28    /// Maximum notes to curate per dream run.
29    #[serde(default = "default_batch_size")]
30    pub batch_size: usize,
31    /// Model ID for curation LLM calls.
32    #[serde(default = "default_model")]
33    pub model: String,
34}
35
36fn default_min_raw_notes() -> usize {
37    3
38}
39fn default_batch_size() -> usize {
40    10
41}
42fn default_model() -> String {
43    "auto".to_string()
44}
45
46impl Default for KnowledgeDreamConfig {
47    fn default() -> Self {
48        Self {
49            enabled: false,
50            min_raw_notes: default_min_raw_notes(),
51            batch_size: default_batch_size(),
52            model: default_model(),
53        }
54    }
55}
56
57/// Report from a knowledge dream run.
58#[derive(Debug, Clone, Serialize, Deserialize)]
59pub struct KnowledgeDreamReport {
60    /// Unique dream ID.
61    pub dream_id: String,
62    /// When the dream started.
63    pub started_at: chrono::DateTime<Utc>,
64    /// When the dream completed.
65    pub completed_at: chrono::DateTime<Utc>,
66    /// Total notes scanned.
67    pub notes_scanned: usize,
68    /// Notes successfully curated.
69    pub notes_curated: usize,
70    /// Notes skipped (errors or no changes needed).
71    pub notes_skipped: usize,
72    /// Error messages.
73    pub errors: Vec<String>,
74    /// Duration in milliseconds.
75    pub duration_ms: u64,
76}
77
78/// A raw note awaiting curation.
79struct RawNote {
80    path: String,
81    meta: NoteMeta,
82    body: String,
83}
84
85/// A curated note ready for write-back.
86struct CuratedNote {
87    path: String,
88    original_meta: NoteMeta,
89    curated_body: String,
90}
91
92/// Knowledge dream process.
93pub struct KnowledgeDream {
94    knowledge_base: Arc<KnowledgeBase>,
95    git_layer: Arc<GitLayer>,
96    engine_handle: Arc<EngineHandle>,
97    model_id: String,
98    config: KnowledgeDreamConfig,
99}
100
101impl KnowledgeDream {
102    /// Create a new knowledge dream process.
103    pub fn new(
104        knowledge_base: Arc<KnowledgeBase>,
105        git_layer: Arc<GitLayer>,
106        engine_handle: Arc<EngineHandle>,
107        config: KnowledgeDreamConfig,
108    ) -> Self {
109        let model_id = if config.model == "auto" {
110            // Will use the default model from engine
111            String::new()
112        } else {
113            config.model.clone()
114        };
115        Self {
116            knowledge_base,
117            git_layer,
118            engine_handle,
119            model_id,
120            config,
121        }
122    }
123
124    /// Run the knowledge dream.
125    pub async fn dream(&self) -> KnowledgeDreamReport {
126        let dream_id = uuid::Uuid::new_v4().to_string();
127        let started_at = Utc::now();
128
129        let mut report = KnowledgeDreamReport {
130            dream_id: dream_id.clone(),
131            started_at,
132            completed_at: started_at,
133            notes_scanned: 0,
134            notes_curated: 0,
135            notes_skipped: 0,
136            errors: Vec::new(),
137            duration_ms: 0,
138        };
139
140        // Phase 1: Scan
141        let raw_notes = match self.scan().await {
142            Ok(notes) => notes,
143            Err(e) => {
144                report.errors.push(format!("Scan failed: {e}"));
145                return self.finish_dream(report);
146            }
147        };
148
149        report.notes_scanned = raw_notes.len();
150
151        if raw_notes.len() < self.config.min_raw_notes {
152            report.notes_skipped = raw_notes.len();
153            return self.finish_dream(report);
154        }
155
156        // Phase 2: Curate
157        let curated = match self.curate(&raw_notes).await {
158            Ok(c) => c,
159            Err(e) => {
160                report.errors.push(format!("Curation failed: {e}"));
161                return self.finish_dream(report);
162            }
163        };
164
165        // Phase 3: Write back
166        for note in &curated {
167            // Git commit the original before overwriting
168            if let Err(e) = self.git_layer.commit_file(
169                &note.path,
170                &format!("dream: pre-curation snapshot ({})", dream_id),
171            ) {
172                tracing::warn!(
173                    path = %note.path,
174                    error = %e,
175                    "Failed to git-commit before curation"
176                );
177            }
178
179            let new_meta = NoteMeta {
180                source: NoteSource::Dream,
181                quality: NoteQuality::Curated,
182                needs_review: false,
183                ..note.original_meta.clone()
184            };
185
186            match self.knowledge_base.note_write_with_meta(
187                &note.path,
188                &note.curated_body,
189                &new_meta,
190            ) {
191                Ok(true) => {
192                    tracing::info!(path = %note.path, "Knowledge dream curated note");
193                    report.notes_curated += 1;
194                }
195                Ok(false) => {
196                    tracing::warn!(path = %note.path, "Dream skipped: user-authored note");
197                    report.notes_skipped += 1;
198                }
199                Err(e) => {
200                    tracing::warn!(path = %note.path, error = %e, "Failed to write curated note");
201                    report
202                        .errors
203                        .push(format!("Write failed for {}: {e}", note.path));
204                    report.notes_skipped += 1;
205                }
206            }
207        }
208
209        self.finish_dream(report)
210    }
211
212    /// Finalize timestamps, save report, return.
213    fn finish_dream(&self, mut report: KnowledgeDreamReport) -> KnowledgeDreamReport {
214        report.completed_at = Utc::now();
215        report.duration_ms = (report.completed_at - report.started_at)
216            .num_milliseconds()
217            .max(0) as u64;
218
219        let report_path = self
220            .knowledge_base
221            .root()
222            .join(".oxios")
223            .join("dream_reports")
224            .join(format!("{}.json", report.dream_id));
225        if let Some(parent) = report_path.parent() {
226            let _ = std::fs::create_dir_all(parent);
227        }
228        if let Ok(data) = serde_json::to_string_pretty(&report) {
229            let _ = std::fs::write(&report_path, data);
230        }
231        report
232    }
233
234    /// Spawn the dream as a background task.
235    pub fn spawn(self: &Arc<Self>) {
236        let kd = Arc::clone(self);
237        tokio::spawn(async move {
238            let report = kd.dream().await;
239            if report.notes_curated > 0 || !report.errors.is_empty() {
240                tracing::info!(
241                    dream_id = %report.dream_id,
242                    curated = report.notes_curated,
243                    errors = report.errors.len(),
244                    "Knowledge dream completed"
245                );
246            }
247        });
248    }
249
250    /// Phase 1: Scan for raw notes needing review.
251    async fn scan(&self) -> Result<Vec<RawNote>> {
252        let review_list = self.knowledge_base.notes_needing_review()?;
253
254        let mut notes = Vec::new();
255        for (path, meta) in review_list.into_iter().take(self.config.batch_size) {
256            let content = self.knowledge_base.note_read(&path)?;
257            let body = match content {
258                Some(c) => {
259                    let (_, body) = oxios_markdown::knowledge::parse_note_meta(&c);
260                    body
261                }
262                None => continue,
263            };
264            notes.push(RawNote { path, meta, body });
265        }
266
267        Ok(notes)
268    }
269
270    /// Phase 2: LLM curation of raw notes.
271    async fn curate(&self, notes: &[RawNote]) -> Result<Vec<CuratedNote>> {
272        let mut curated = Vec::new();
273
274        for note in notes {
275            match self.curate_single(&note.body).await {
276                Ok(curated_body) => {
277                    if curated_body.trim() != note.body.trim() {
278                        curated.push(CuratedNote {
279                            path: note.path.clone(),
280                            original_meta: note.meta.clone(),
281                            curated_body,
282                        });
283                    }
284                    // If LLM returned essentially the same content, skip
285                }
286                Err(e) => {
287                    tracing::warn!(
288                        path = %note.path,
289                        error = %e,
290                        "Failed to curate note, skipping"
291                    );
292                }
293            }
294        }
295
296        Ok(curated)
297    }
298
299    /// Curate a single note via LLM.
300    async fn curate_single(&self, body: &str) -> Result<String> {
301        let engine = self.engine_handle.get();
302        let model_id = if self.model_id.is_empty() {
303            engine.default_model_id().to_string()
304        } else {
305            self.model_id.clone()
306        };
307
308        let agent_config = oxi_sdk::AgentConfig {
309            description: Some("Knowledge curation".into()),
310            model_id,
311            system_prompt: Some(
312                "You are a knowledge editor. You refine raw agent-generated notes into \
313                 clean, well-structured knowledge documents.\n\n\
314                 Rules:\n\
315                 - Remove conversational artifacts: greetings, sign-offs, hedging, questions to the user.\n\
316                 - Keep all substantive content: facts, analysis, code, data, explanations.\n\
317                 - Improve structure if needed: add headers, organize sections.\n\
318                 - Preserve the original meaning. Do not add new information.\n\
319                 - Output only the cleaned markdown body. No frontmatter. No explanation."
320                    .to_string(),
321            ),
322            max_tokens: Some(4096),
323            temperature: Some(0.3),
324            ..Default::default()
325        };
326
327        let agent = engine.oxi().agent(agent_config).build()?;
328        let (response, _) = agent.run(body.to_string()).await?;
329        Ok(response.content)
330    }
331}