1use std::sync::Arc;
9
10use anyhow::{Context, Result};
11use chrono::Utc;
12use serde::{Deserialize, Serialize};
13
14use crate::engine::EngineHandle;
15use crate::git_layer::GitLayer;
16use oxios_markdown::KnowledgeBase;
17use oxios_markdown::types::{NoteMeta, NoteQuality, NoteSource};
18
19#[derive(Debug, Clone, Serialize, Deserialize)]
21pub struct KnowledgeDreamConfig {
22 #[serde(default)]
24 pub enabled: bool,
25 #[serde(default = "default_min_raw_notes")]
27 pub min_raw_notes: usize,
28 #[serde(default = "default_batch_size")]
30 pub batch_size: usize,
31 #[serde(default = "default_model")]
33 pub model: String,
34}
35
36fn default_min_raw_notes() -> usize {
37 3
38}
39fn default_batch_size() -> usize {
40 10
41}
42fn default_model() -> String {
43 "auto".to_string()
44}
45
46impl Default for KnowledgeDreamConfig {
47 fn default() -> Self {
48 Self {
49 enabled: false,
50 min_raw_notes: default_min_raw_notes(),
51 batch_size: default_batch_size(),
52 model: default_model(),
53 }
54 }
55}
56
57#[derive(Debug, Clone, Serialize, Deserialize)]
59pub struct KnowledgeDreamReport {
60 pub dream_id: String,
62 pub started_at: chrono::DateTime<Utc>,
64 pub completed_at: chrono::DateTime<Utc>,
66 pub notes_scanned: usize,
68 pub notes_curated: usize,
70 pub notes_skipped: usize,
72 pub errors: Vec<String>,
74 pub duration_ms: u64,
76}
77
78struct RawNote {
80 path: String,
81 meta: NoteMeta,
82 body: String,
83}
84
85struct CuratedNote {
87 path: String,
88 original_meta: NoteMeta,
89 curated_body: String,
90}
91
92pub struct KnowledgeDream {
94 knowledge_base: Arc<KnowledgeBase>,
95 git_layer: Arc<GitLayer>,
96 engine_handle: Arc<EngineHandle>,
97 model_id: String,
98 config: KnowledgeDreamConfig,
99}
100
101impl KnowledgeDream {
102 pub fn new(
109 knowledge_base: Arc<KnowledgeBase>,
110 git_layer: Arc<GitLayer>,
111 engine_handle: Arc<EngineHandle>,
112 config: KnowledgeDreamConfig,
113 ) -> Result<Self> {
114 let model_id = if config.model == "auto" {
115 String::new()
117 } else {
118 let engine = engine_handle.get();
122 let model = engine.resolve_model(&config.model).with_context(|| {
123 format!(
124 "knowledge_dream.model '{}' is not a known model",
125 config.model
126 )
127 })?;
128 engine.create_provider(&model.provider).with_context(|| {
129 format!(
130 "knowledge_dream.model '{}' provider '{}' is not configured",
131 config.model, model.provider
132 )
133 })?;
134 config.model.clone()
135 };
136 Ok(Self {
137 knowledge_base,
138 git_layer,
139 engine_handle,
140 model_id,
141 config,
142 })
143 }
144
145 pub async fn dream(&self) -> KnowledgeDreamReport {
147 let dream_id = uuid::Uuid::new_v4().to_string();
148 let started_at = Utc::now();
149
150 let mut report = KnowledgeDreamReport {
151 dream_id: dream_id.clone(),
152 started_at,
153 completed_at: started_at,
154 notes_scanned: 0,
155 notes_curated: 0,
156 notes_skipped: 0,
157 errors: Vec::new(),
158 duration_ms: 0,
159 };
160
161 let raw_notes = match self.scan().await {
163 Ok(notes) => notes,
164 Err(e) => {
165 report.errors.push(format!("Scan failed: {e}"));
166 return self.finish_dream(report);
167 }
168 };
169
170 report.notes_scanned = raw_notes.len();
171
172 if raw_notes.len() < self.config.min_raw_notes {
173 report.notes_skipped = raw_notes.len();
174 return self.finish_dream(report);
175 }
176
177 let curated = match self.curate(&raw_notes).await {
179 Ok(c) => c,
180 Err(e) => {
181 report.errors.push(format!("Curation failed: {e}"));
182 return self.finish_dream(report);
183 }
184 };
185
186 for note in &curated {
188 if let Err(e) = self.git_layer.commit_file(
190 ¬e.path,
191 &format!("dream: pre-curation snapshot ({})", dream_id),
192 ) {
193 tracing::warn!(
194 path = %note.path,
195 error = %e,
196 "Failed to git-commit before curation"
197 );
198 }
199
200 let new_meta = NoteMeta {
201 source: NoteSource::Dream,
202 quality: NoteQuality::Curated,
203 needs_review: false,
204 ..note.original_meta.clone()
205 };
206
207 match self.knowledge_base.note_write_with_meta(
208 ¬e.path,
209 ¬e.curated_body,
210 &new_meta,
211 ) {
212 Ok(true) => {
213 tracing::info!(path = %note.path, "Knowledge dream curated note");
214 report.notes_curated += 1;
215 }
216 Ok(false) => {
217 tracing::warn!(path = %note.path, "Dream skipped: user-authored note");
218 report.notes_skipped += 1;
219 }
220 Err(e) => {
221 tracing::warn!(path = %note.path, error = %e, "Failed to write curated note");
222 report
223 .errors
224 .push(format!("Write failed for {}: {e}", note.path));
225 report.notes_skipped += 1;
226 }
227 }
228 }
229
230 self.finish_dream(report)
231 }
232
233 fn finish_dream(&self, mut report: KnowledgeDreamReport) -> KnowledgeDreamReport {
235 report.completed_at = Utc::now();
236 report.duration_ms = (report.completed_at - report.started_at)
237 .num_milliseconds()
238 .max(0) as u64;
239
240 let report_path = self
241 .knowledge_base
242 .root()
243 .join(".oxios")
244 .join("dream_reports")
245 .join(format!("{}.json", report.dream_id));
246 if let Some(parent) = report_path.parent() {
247 let _ = std::fs::create_dir_all(parent);
248 }
249 if let Ok(data) = serde_json::to_string_pretty(&report) {
250 let _ = std::fs::write(&report_path, data);
251 }
252 report
253 }
254
255 pub fn spawn(self: &Arc<Self>) {
257 let kd = Arc::clone(self);
258 tokio::spawn(async move {
259 let report = kd.dream().await;
260 if report.notes_curated > 0 || !report.errors.is_empty() {
261 tracing::info!(
262 dream_id = %report.dream_id,
263 curated = report.notes_curated,
264 errors = report.errors.len(),
265 "Knowledge dream completed"
266 );
267 }
268 });
269 }
270
271 async fn scan(&self) -> Result<Vec<RawNote>> {
273 let review_list = self.knowledge_base.notes_needing_review()?;
274
275 let mut notes = Vec::new();
276 for (path, meta) in review_list.into_iter().take(self.config.batch_size) {
277 let content = self.knowledge_base.note_read(&path)?;
278 let body = match content {
279 Some(c) => {
280 let (_, body) = oxios_markdown::knowledge::parse_note_meta(&c);
281 body
282 }
283 None => continue,
284 };
285 notes.push(RawNote { path, meta, body });
286 }
287
288 Ok(notes)
289 }
290
291 async fn curate(&self, notes: &[RawNote]) -> Result<Vec<CuratedNote>> {
293 let mut curated = Vec::new();
294
295 for note in notes {
296 match self.curate_single(¬e.body).await {
297 Ok(curated_body) => {
298 if curated_body.trim() != note.body.trim() {
299 curated.push(CuratedNote {
300 path: note.path.clone(),
301 original_meta: note.meta.clone(),
302 curated_body,
303 });
304 }
305 }
307 Err(e) => {
308 tracing::warn!(
309 path = %note.path,
310 error = %e,
311 "Failed to curate note, skipping"
312 );
313 }
314 }
315 }
316
317 Ok(curated)
318 }
319
320 async fn curate_single(&self, body: &str) -> Result<String> {
322 let engine = self.engine_handle.get();
323 let model_id = if self.model_id.is_empty() {
324 engine.default_model_id().to_string()
325 } else {
326 self.model_id.clone()
327 };
328
329 let agent_config = oxi_sdk::AgentConfig {
330 description: Some("Knowledge curation".into()),
331 model_id,
332 system_prompt: Some(
333 "You are a knowledge editor. You refine raw agent-generated notes into \
334 clean, well-structured knowledge documents.\n\n\
335 Rules:\n\
336 - Remove conversational artifacts: greetings, sign-offs, hedging, questions to the user.\n\
337 - Keep all substantive content: facts, analysis, code, data, explanations.\n\
338 - Improve structure if needed: add headers, organize sections.\n\
339 - Preserve the original meaning. Do not add new information.\n\
340 - Output only the cleaned markdown body. No frontmatter. No explanation."
341 .to_string(),
342 ),
343 max_tokens: Some(4096),
344 temperature: Some(0.3),
345 ..Default::default()
346 };
347
348 let agent = engine.oxi().agent(agent_config).build()?;
349 let (response, _) = agent.run(body.to_string()).await?;
350 Ok(response.content)
351 }
352}