Skip to main content

oxios_kernel/
knowledge_dream.rs

1//! KnowledgeDream — periodic LLM curation of agent-generated knowledge notes (RFC-022).
2//!
3//! Phase 1: Scan — find notes with `needs_review: true` and `quality: raw`.
4//! Phase 2: Curate — LLM strips conversational artifacts and improves structure.
5//! Phase 3: Write Back — overwrite with curated content, commit to git first.
6//! Phase 4: Report — save dream report.
7
8use std::sync::Arc;
9
10use anyhow::{Context, Result};
11use chrono::Utc;
12use serde::{Deserialize, Serialize};
13
14use crate::engine::EngineHandle;
15use crate::git_layer::GitLayer;
16use oxios_markdown::KnowledgeBase;
17use oxios_markdown::types::{NoteMeta, NoteQuality, NoteSource};
18
19/// Configuration for knowledge dream.
20#[derive(Debug, Clone, Serialize, Deserialize)]
21pub struct KnowledgeDreamConfig {
22    /// Enable/disable knowledge dream.
23    #[serde(default)]
24    pub enabled: bool,
25    /// Minimum raw notes before triggering a dream.
26    #[serde(default = "default_min_raw_notes")]
27    pub min_raw_notes: usize,
28    /// Maximum notes to curate per dream run.
29    #[serde(default = "default_batch_size")]
30    pub batch_size: usize,
31    /// Model ID for curation LLM calls.
32    #[serde(default = "default_model")]
33    pub model: String,
34}
35
36fn default_min_raw_notes() -> usize {
37    3
38}
39fn default_batch_size() -> usize {
40    10
41}
42fn default_model() -> String {
43    "auto".to_string()
44}
45
46impl Default for KnowledgeDreamConfig {
47    fn default() -> Self {
48        Self {
49            enabled: false,
50            min_raw_notes: default_min_raw_notes(),
51            batch_size: default_batch_size(),
52            model: default_model(),
53        }
54    }
55}
56
57/// Report from a knowledge dream run.
58#[derive(Debug, Clone, Serialize, Deserialize)]
59pub struct KnowledgeDreamReport {
60    /// Unique dream ID.
61    pub dream_id: String,
62    /// When the dream started.
63    pub started_at: chrono::DateTime<Utc>,
64    /// When the dream completed.
65    pub completed_at: chrono::DateTime<Utc>,
66    /// Total notes scanned.
67    pub notes_scanned: usize,
68    /// Notes successfully curated.
69    pub notes_curated: usize,
70    /// Notes skipped (errors or no changes needed).
71    pub notes_skipped: usize,
72    /// Error messages.
73    pub errors: Vec<String>,
74    /// Duration in milliseconds.
75    pub duration_ms: u64,
76}
77
78/// A raw note awaiting curation.
79struct RawNote {
80    path: String,
81    meta: NoteMeta,
82    body: String,
83}
84
85/// A curated note ready for write-back.
86struct CuratedNote {
87    path: String,
88    original_meta: NoteMeta,
89    curated_body: String,
90}
91
92/// Knowledge dream process.
93pub struct KnowledgeDream {
94    knowledge_base: Arc<KnowledgeBase>,
95    git_layer: Arc<GitLayer>,
96    engine_handle: Arc<EngineHandle>,
97    model_id: String,
98    config: KnowledgeDreamConfig,
99}
100
101impl KnowledgeDream {
102    /// Create a new knowledge dream process.
103    ///
104    /// When `config.model` is an explicit model (not `"auto"`), it is validated
105    /// against the live engine now — a typo fails at boot with a clear error
106    /// rather than silently at every curation run. Callers should skip
107    /// spawning on `Err` (the dream is a non-critical background feature).
108    pub fn new(
109        knowledge_base: Arc<KnowledgeBase>,
110        git_layer: Arc<GitLayer>,
111        engine_handle: Arc<EngineHandle>,
112        config: KnowledgeDreamConfig,
113    ) -> Result<Self> {
114        let model_id = if config.model == "auto" {
115            // Will use the default model from engine at curation time.
116            String::new()
117        } else {
118            // Fail-fast: resolve the explicit curation model AND its provider so
119            // a malformed id or unconfigured provider surfaces here, not in the
120            // background loop.
121            let engine = engine_handle.get();
122            let model = engine.resolve_model(&config.model).with_context(|| {
123                format!(
124                    "knowledge_dream.model '{}' is not a known model",
125                    config.model
126                )
127            })?;
128            engine.create_provider(&model.provider).with_context(|| {
129                format!(
130                    "knowledge_dream.model '{}' provider '{}' is not configured",
131                    config.model, model.provider
132                )
133            })?;
134            config.model.clone()
135        };
136        Ok(Self {
137            knowledge_base,
138            git_layer,
139            engine_handle,
140            model_id,
141            config,
142        })
143    }
144
145    /// Run the knowledge dream.
146    pub async fn dream(&self) -> KnowledgeDreamReport {
147        let dream_id = uuid::Uuid::new_v4().to_string();
148        let started_at = Utc::now();
149
150        let mut report = KnowledgeDreamReport {
151            dream_id: dream_id.clone(),
152            started_at,
153            completed_at: started_at,
154            notes_scanned: 0,
155            notes_curated: 0,
156            notes_skipped: 0,
157            errors: Vec::new(),
158            duration_ms: 0,
159        };
160
161        // Phase 1: Scan
162        let raw_notes = match self.scan().await {
163            Ok(notes) => notes,
164            Err(e) => {
165                report.errors.push(format!("Scan failed: {e}"));
166                return self.finish_dream(report);
167            }
168        };
169
170        report.notes_scanned = raw_notes.len();
171
172        if raw_notes.len() < self.config.min_raw_notes {
173            report.notes_skipped = raw_notes.len();
174            return self.finish_dream(report);
175        }
176
177        // Phase 2: Curate
178        let curated = match self.curate(&raw_notes).await {
179            Ok(c) => c,
180            Err(e) => {
181                report.errors.push(format!("Curation failed: {e}"));
182                return self.finish_dream(report);
183            }
184        };
185
186        // Phase 3: Write back
187        for note in &curated {
188            // Git commit the original before overwriting
189            if let Err(e) = self.git_layer.commit_file(
190                &note.path,
191                &format!("dream: pre-curation snapshot ({})", dream_id),
192            ) {
193                tracing::warn!(
194                    path = %note.path,
195                    error = %e,
196                    "Failed to git-commit before curation"
197                );
198            }
199
200            let new_meta = NoteMeta {
201                source: NoteSource::Dream,
202                quality: NoteQuality::Curated,
203                needs_review: false,
204                ..note.original_meta.clone()
205            };
206
207            match self.knowledge_base.note_write_with_meta(
208                &note.path,
209                &note.curated_body,
210                &new_meta,
211            ) {
212                Ok(true) => {
213                    tracing::info!(path = %note.path, "Knowledge dream curated note");
214                    report.notes_curated += 1;
215                }
216                Ok(false) => {
217                    tracing::warn!(path = %note.path, "Dream skipped: user-authored note");
218                    report.notes_skipped += 1;
219                }
220                Err(e) => {
221                    tracing::warn!(path = %note.path, error = %e, "Failed to write curated note");
222                    report
223                        .errors
224                        .push(format!("Write failed for {}: {e}", note.path));
225                    report.notes_skipped += 1;
226                }
227            }
228        }
229
230        self.finish_dream(report)
231    }
232
233    /// Finalize timestamps, save report, return.
234    fn finish_dream(&self, mut report: KnowledgeDreamReport) -> KnowledgeDreamReport {
235        report.completed_at = Utc::now();
236        report.duration_ms = (report.completed_at - report.started_at)
237            .num_milliseconds()
238            .max(0) as u64;
239
240        let report_path = self
241            .knowledge_base
242            .root()
243            .join(".oxios")
244            .join("dream_reports")
245            .join(format!("{}.json", report.dream_id));
246        if let Some(parent) = report_path.parent() {
247            let _ = std::fs::create_dir_all(parent);
248        }
249        if let Ok(data) = serde_json::to_string_pretty(&report) {
250            let _ = std::fs::write(&report_path, data);
251        }
252        report
253    }
254
255    /// Spawn the dream as a background task.
256    pub fn spawn(self: &Arc<Self>) {
257        let kd = Arc::clone(self);
258        tokio::spawn(async move {
259            let report = kd.dream().await;
260            if report.notes_curated > 0 || !report.errors.is_empty() {
261                tracing::info!(
262                    dream_id = %report.dream_id,
263                    curated = report.notes_curated,
264                    errors = report.errors.len(),
265                    "Knowledge dream completed"
266                );
267            }
268        });
269    }
270
271    /// Phase 1: Scan for raw notes needing review.
272    async fn scan(&self) -> Result<Vec<RawNote>> {
273        let review_list = self.knowledge_base.notes_needing_review()?;
274
275        let mut notes = Vec::new();
276        for (path, meta) in review_list.into_iter().take(self.config.batch_size) {
277            let content = self.knowledge_base.note_read(&path)?;
278            let body = match content {
279                Some(c) => {
280                    let (_, body) = oxios_markdown::knowledge::parse_note_meta(&c);
281                    body
282                }
283                None => continue,
284            };
285            notes.push(RawNote { path, meta, body });
286        }
287
288        Ok(notes)
289    }
290
291    /// Phase 2: LLM curation of raw notes.
292    async fn curate(&self, notes: &[RawNote]) -> Result<Vec<CuratedNote>> {
293        let mut curated = Vec::new();
294
295        for note in notes {
296            match self.curate_single(&note.body).await {
297                Ok(curated_body) => {
298                    if curated_body.trim() != note.body.trim() {
299                        curated.push(CuratedNote {
300                            path: note.path.clone(),
301                            original_meta: note.meta.clone(),
302                            curated_body,
303                        });
304                    }
305                    // If LLM returned essentially the same content, skip
306                }
307                Err(e) => {
308                    tracing::warn!(
309                        path = %note.path,
310                        error = %e,
311                        "Failed to curate note, skipping"
312                    );
313                }
314            }
315        }
316
317        Ok(curated)
318    }
319
320    /// Curate a single note via LLM.
321    async fn curate_single(&self, body: &str) -> Result<String> {
322        let engine = self.engine_handle.get();
323        let model_id = if self.model_id.is_empty() {
324            engine.default_model_id().to_string()
325        } else {
326            self.model_id.clone()
327        };
328
329        let agent_config = oxi_sdk::AgentConfig {
330            description: Some("Knowledge curation".into()),
331            model_id,
332            system_prompt: Some(
333                "You are a knowledge editor. You refine raw agent-generated notes into \
334                 clean, well-structured knowledge documents.\n\n\
335                 Rules:\n\
336                 - Remove conversational artifacts: greetings, sign-offs, hedging, questions to the user.\n\
337                 - Keep all substantive content: facts, analysis, code, data, explanations.\n\
338                 - Improve structure if needed: add headers, organize sections.\n\
339                 - Preserve the original meaning. Do not add new information.\n\
340                 - Output only the cleaned markdown body. No frontmatter. No explanation."
341                    .to_string(),
342            ),
343            max_tokens: Some(4096),
344            temperature: Some(0.3),
345            ..Default::default()
346        };
347
348        let agent = engine.oxi().agent(agent_config).build()?;
349        let (response, _) = agent.run(body.to_string()).await?;
350        Ok(response.content)
351    }
352}