Skip to main content

oxios_kernel/kernel_handle/
knowledge_lens.rs

1//! KnowledgeLens — semantic search overlay for the markdown knowledge base.
2//!
3//! Wraps a [`KnowledgeBase`] and adds HNSW-based semantic vector search
4//! via the agent's [`MemoryManager`]. Provides `recall_for_context()` for
5//! injecting relevant knowledge into agent context windows.
6//!
7//! **RFC-003: Knowledge Base Independent Separation**
8//! - Semantic search lives in the kernel (AI layer), not oxios-markdown
9//! - `KnowledgeLens` subscribes to `KnowledgeBase.on_file_change()` to keep
10//!   the HNSW index in sync automatically
11
12use std::collections::HashSet;
13use std::path::PathBuf;
14use std::sync::Arc;
15
16use anyhow::Result;
17use parking_lot::RwLock;
18use serde::{Deserialize, Serialize};
19use tokio::sync::mpsc;
20
21use crate::memory::{MemoryEntry, MemoryManager, MemoryType};
22
23/// Knowledge context injected into agent prompts.
24#[derive(Debug, Clone, Default, Serialize, Deserialize)]
25pub struct KnowledgeContext {
26    /// Relevant knowledge notes for the query.
27    pub notes: Vec<KnowledgeNote>,
28    /// Memory entries from agent memory.
29    pub memories: Vec<MemoryNote>,
30    /// Number of HNSW index entries used.
31    pub index_entries_used: usize,
32}
33
34/// A knowledge note extracted from the markdown knowledge base.
35#[derive(Debug, Clone, Serialize, Deserialize)]
36pub struct KnowledgeNote {
37    /// Relative path.
38    pub path: String,
39    /// Display name.
40    pub name: String,
41    /// Content snippet.
42    pub content: String,
43    /// Number of backlinks.
44    pub backlink_count: usize,
45}
46
47/// A memory entry from the agent's memory system.
48#[derive(Debug, Clone, Serialize, Deserialize)]
49pub struct MemoryNote {
50    /// Memory ID.
51    pub id: String,
52    /// Source tag (e.g. "memory:agent", "session:...").
53    pub source: String,
54    /// Content snippet.
55    pub content: String,
56    /// Importance score (0-1).
57    pub importance: f32,
58}
59
60/// Copilot response (AI-powered chat about the knowledge base).
61#[derive(Debug, Clone, Serialize, Deserialize)]
62pub struct CopilotResponse {
63    /// AI-generated answer.
64    pub content: String,
65    /// Note paths referenced in the response.
66    pub referenced_notes: Vec<String>,
67    /// Memory IDs referenced in the response.
68    pub referenced_memories: Vec<String>,
69}
70
71/// KnowledgeLens — semantic overlay over KnowledgeBase.
72///
73/// Owns the HNSW index (via MemoryManager) and keeps it synchronized
74/// with the markdown knowledge base via file-change callbacks.
75pub struct KnowledgeLens {
76    /// The underlying knowledge base.
77    kb: Arc<oxios_markdown::KnowledgeBase>,
78    /// Agent memory manager (provides HNSW index + keyword search).
79    memory: Arc<MemoryManager>,
80    /// Tracks which files were written by agents.
81    agent_writes: Arc<RwLock<HashSet<String>>>,
82    /// Holds the file-change channel sender. The field itself is never read;
83    /// its sole purpose is to keep the sender alive so the background
84    /// file-watcher task draining the receiver does not exit early. Drop this
85    /// and index sync silently stops. Named with a leading underscore to
86    /// signal "intentionally unused" to maintainers.
87    #[allow(dead_code)]
88    _callback_keepalive: Option<mpsc::Sender<oxios_markdown::knowledge::FileChange>>,
89}
90
91impl std::fmt::Debug for KnowledgeLens {
92    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
93        f.debug_struct("KnowledgeLens").finish()
94    }
95}
96
97impl KnowledgeLens {
98    /// Create a new KnowledgeLens wrapping the given knowledge base.
99    ///
100    /// Registers a file-change callback to keep the memory index in sync.
101    pub fn new(
102        kb: Arc<oxios_markdown::KnowledgeBase>,
103        memory: Arc<MemoryManager>,
104    ) -> anyhow::Result<Self> {
105        let (tx, mut rx) = mpsc::channel::<oxios_markdown::knowledge::FileChange>(64);
106        let tx_for_cb = tx.clone();
107        kb.on_file_change(move |_path, event| {
108            let tx = tx.clone();
109            tokio::spawn(async move {
110                let _ = tx.send(event).await;
111            });
112        });
113
114        let lens = Self {
115            kb,
116            memory,
117            agent_writes: Arc::new(RwLock::new(HashSet::new())),
118            _callback_keepalive: Some(tx_for_cb),
119        };
120
121        // Spawn background task to process file-change events
122        let memory = lens.memory.clone();
123        let kb = lens.kb.clone();
124        tokio::spawn(async move {
125            while let Some(event) = rx.recv().await {
126                lens_handle_event(kb.clone(), memory.clone(), event);
127            }
128        });
129
130        Ok(lens)
131    }
132
133    /// Get the root path of the knowledge base.
134    pub fn root(&self) -> PathBuf {
135        self.kb.root()
136    }
137
138    /// Get the underlying knowledge base (read-only access).
139    pub fn knowledge_base(&self) -> &Arc<oxios_markdown::KnowledgeBase> {
140        &self.kb
141    }
142
143    /// Mark a file as having been written by an agent.
144    pub fn mark_agent_write(&self, path: &str) {
145        self.agent_writes.write().insert(path.to_string());
146    }
147
148    /// Check if a file was written by an agent.
149    pub fn is_agent_write(&self, path: &str) -> bool {
150        self.agent_writes.read().contains(path)
151    }
152
153    /// Clear the agent-write marker for a file.
154    pub fn clear_agent_write(&self, path: &str) {
155        self.agent_writes.write().remove(path);
156    }
157
158    /// Recall relevant knowledge for a given context/query.
159    ///
160    /// Combines markdown note search (via KnowledgeBase) with agent memory
161    /// search (via MemoryManager). Returns notes ranked by relevance.
162    pub async fn recall_for_context(&self, query: &str, limit: usize) -> Result<KnowledgeContext> {
163        // Search agent memory for relevant entries
164        let mem_entries = self
165            .memory
166            .search(query, None, limit)
167            .await
168            .unwrap_or_default();
169
170        let memories: Vec<MemoryNote> = mem_entries
171            .iter()
172            .map(|e| MemoryNote {
173                id: e.id.clone(),
174                source: e.source.clone(),
175                content: e.content.chars().take(300).collect(),
176                importance: e.importance,
177            })
178            .collect();
179
180        // Search knowledge notes
181        let note_hits = self.kb.search(query, limit)?;
182
183        let notes: Vec<KnowledgeNote> = note_hits
184            .into_iter()
185            .map(|h| {
186                let content = self
187                    .kb
188                    .note_read(&h.path)
189                    .ok()
190                    .flatten()
191                    .map(|c| c.chars().take(500).collect::<String>())
192                    .unwrap_or_default();
193                KnowledgeNote {
194                    path: h.path,
195                    name: h.name,
196                    content,
197                    backlink_count: h.backlink_count,
198                }
199            })
200            .collect();
201
202        Ok(KnowledgeContext {
203            notes,
204            memories,
205            index_entries_used: mem_entries.len(),
206        })
207    }
208
209    /// Copilot chat — AI-powered question answering about the knowledge base.
210    ///
211    /// This method is async (uses `provider.stream()` which is Send).
212    #[allow(clippy::unused_async)]
213    pub async fn copilot_chat(
214        &self,
215        engine_handle: Arc<crate::engine::EngineHandle>,
216        question: &str,
217        context_path: Option<&str>,
218    ) -> Result<CopilotResponse> {
219        let mut context_parts = Vec::new();
220        let mut referenced_notes = Vec::new();
221
222        // 1. Current file context
223        if let Some(path) = context_path
224            && let Ok(Some(content)) = self.kb.note_read(path)
225        {
226            let snippet: String = content.chars().take(2000).collect();
227            context_parts.push(format!("## Current: {path}\n\n{snippet}"));
228            referenced_notes.push(path.to_string());
229        }
230
231        // 2. Related notes
232        let hits = self.kb.search(question, 5).unwrap_or_default();
233        for hit in &hits {
234            if referenced_notes.contains(&hit.path) {
235                continue;
236            }
237            if let Ok(Some(content)) = self.kb.note_read(&hit.path) {
238                let snippet: String = content.chars().take(500).collect();
239                context_parts.push(format!("## Related: {}\n\n{}", hit.path, snippet));
240                referenced_notes.push(hit.path.clone());
241            }
242        }
243
244        // 3. Memory context
245        let mut referenced_memories = Vec::new();
246        if let Ok(entries) = self.memory.search(question, None, 3).await {
247            for mem in &entries {
248                context_parts.push(format!(
249                    "## Memory [{}]: {}",
250                    mem.memory_type.label(),
251                    mem.content.chars().take(200).collect::<String>()
252                ));
253                referenced_memories.push(mem.id.clone());
254            }
255        }
256
257        // 4. AI call
258        let system_prompt = format!(
259            "You are a knowledge assistant embedded in a markdown note-taking system.\n\
260             Answer questions about the user's notes using ONLY the provided context.\n\n\
261             ## Rules\n\
262             - Only answer based on the context below. If the context doesn't contain\n\
263               the answer, say \"I couldn't find relevant notes on that topic.\"\n\
264             - Cite which notes you're referencing by name.\n\
265             - Be concise — the user is in an editor, not a chat room.\n\n\
266             ## Available Notes\n\n{}",
267            context_parts.join("\n\n")
268        );
269
270        // Resolve the live default model + a cached provider through the same
271        // single source of truth the rest of the kernel uses (interview,
272        // execute, persistence). Honors hot-swaps and the user's configured
273        // provider/key — fixes the old hardcoded anthropic engine bug.
274        let resolved = engine_handle
275            .resolve_default()
276            .map_err(|e| anyhow::anyhow!("Model/provider: {e}"))?;
277
278        let mut ctx = oxi_sdk::Context::new();
279        ctx.set_system_prompt(&system_prompt);
280        ctx.add_message(oxi_sdk::Message::User(oxi_sdk::UserMessage::new(question)));
281
282        let stream = resolved
283            .provider
284            .stream(&resolved.model, &ctx, None)
285            .await
286            .map_err(|e| anyhow::anyhow!("Stream: {e}"))?;
287        let mut text = String::new();
288        use futures::StreamExt;
289        let mut pinned = std::pin::pin!(stream);
290        while let Some(event) = pinned.next().await {
291            match event {
292                oxi_sdk::ProviderEvent::TextDelta { delta, .. } => text.push_str(&delta),
293                oxi_sdk::ProviderEvent::Done { .. } => break,
294                oxi_sdk::ProviderEvent::Error { error, .. } => {
295                    return Err(anyhow::anyhow!("AI: {error:?}"));
296                }
297                _ => {}
298            }
299        }
300
301        Ok(CopilotResponse {
302            content: text,
303            referenced_notes,
304            referenced_memories,
305        })
306    }
307}
308
309// ─── File change event handler ────────────────────────────────────────────────
310
311fn lens_handle_event(
312    kb: Arc<oxios_markdown::KnowledgeBase>,
313    memory: Arc<MemoryManager>,
314    event: oxios_markdown::knowledge::FileChange,
315) {
316    use oxios_markdown::knowledge::FileChange::*;
317    match event {
318        Created(path) | Updated(path) => {
319            if let Ok(Some(content)) = kb.note_read(&path) {
320                index_to_memory(&path, &content, &memory);
321            }
322        }
323        Deleted(path) => {
324            let id = format!("note-{}", path.replace('/', "-").trim_end_matches(".md"));
325            let rt = tokio::runtime::Handle::try_current();
326            if let Ok(handle) = rt {
327                let memory = memory.clone();
328                handle.spawn(async move {
329                    let _ = memory.forget(&id, MemoryType::Knowledge).await;
330                });
331            }
332        }
333        Moved { old, new } => {
334            let id = format!("note-{}", old.replace('/', "-").trim_end_matches(".md"));
335            let rt = tokio::runtime::Handle::try_current();
336            if let Ok(handle) = rt {
337                let memory = memory.clone();
338                let kb = kb.clone();
339                let new_path = new.clone();
340                handle.spawn(async move {
341                    let _ = memory.forget(&id, MemoryType::Knowledge).await;
342                    if let Ok(Some(content)) = kb.note_read(&new_path) {
343                        index_to_memory(&new_path, &content, &memory);
344                    }
345                });
346            }
347        }
348    }
349}
350
351fn index_to_memory(path: &str, content: &str, memory: &Arc<MemoryManager>) {
352    let tags = oxios_markdown::parser::extract_headings(content)
353        .into_iter()
354        .take(5)
355        .collect::<Vec<_>>();
356    let now = chrono::Utc::now();
357    let importance = 0.5_f32.min(0.3 + (tags.len() as f32 * 0.05));
358
359    let entry = MemoryEntry {
360        id: format!("note-{}", path.replace('/', "-").trim_end_matches(".md")),
361        memory_type: MemoryType::Knowledge,
362        tier: crate::memory::MemoryTier::Warm,
363        content: content.to_string(),
364        content_hash: 0,
365        source: "knowledge:lens".to_string(),
366        session_id: None,
367        tags,
368        importance,
369        pinned: false,
370        protection: crate::memory::ProtectionLevel::None,
371        auto_classified: false,
372        session_appearances: 0,
373        user_corrected: false,
374        seen_in_sessions: vec![],
375        created_at: now,
376        accessed_at: now,
377        modified_at: now,
378        access_count: 0,
379        decay_score: 1.0,
380        compaction_level: 0,
381        compacted_from: vec![],
382        related_ids: vec![],
383        contradicts: None,
384    };
385
386    let rt = tokio::runtime::Handle::try_current();
387    if let Ok(handle) = rt {
388        let memory = memory.clone();
389        handle.spawn(async move {
390            let _ = memory.remember(entry).await;
391        });
392    }
393}
394
395#[cfg(test)]
396mod tests {
397    use super::*;
398
399    #[test]
400    fn test_knowledge_context_default() {
401        let ctx = KnowledgeContext::default();
402        assert!(ctx.notes.is_empty());
403        assert!(ctx.memories.is_empty());
404        assert_eq!(ctx.index_entries_used, 0);
405    }
406
407    #[test]
408    fn test_knowledge_note_serialization() {
409        let note = KnowledgeNote {
410            path: "notes/Rust.md".to_string(),
411            name: "Rust".to_string(),
412            content: "Rust is a systems language".to_string(),
413            backlink_count: 3,
414        };
415        let json = serde_json::to_string(&note).unwrap();
416        let restored: KnowledgeNote = serde_json::from_str(&json).unwrap();
417        assert_eq!(restored.path, "notes/Rust.md");
418        assert_eq!(restored.backlink_count, 3);
419    }
420
421    #[test]
422    fn test_memory_note_serialization() {
423        let note = MemoryNote {
424            id: "mem-123".to_string(),
425            source: "session:abc".to_string(),
426            content: "User prefers dark mode".to_string(),
427            importance: 0.85,
428        };
429        let json = serde_json::to_string(&note).unwrap();
430        let restored: MemoryNote = serde_json::from_str(&json).unwrap();
431        assert_eq!(restored.id, "mem-123");
432        assert!((restored.importance - 0.85).abs() < 0.01);
433    }
434
435    #[test]
436    fn test_copilot_response_serialization() {
437        let resp = CopilotResponse {
438            content: "The answer is 42".to_string(),
439            referenced_notes: vec!["notes/answer.md".to_string()],
440            referenced_memories: vec!["mem-1".to_string()],
441        };
442        let json = serde_json::to_string(&resp).unwrap();
443        let restored: CopilotResponse = serde_json::from_str(&json).unwrap();
444        assert_eq!(restored.content, "The answer is 42");
445        assert_eq!(restored.referenced_notes.len(), 1);
446        assert_eq!(restored.referenced_memories.len(), 1);
447    }
448
449    #[test]
450    fn test_knowledge_context_with_data() {
451        let ctx = KnowledgeContext {
452            notes: vec![KnowledgeNote {
453                path: "test.md".to_string(),
454                name: "Test".to_string(),
455                content: "Hello".to_string(),
456                backlink_count: 0,
457            }],
458            memories: vec![],
459            index_entries_used: 42,
460        };
461        let json = serde_json::to_string(&ctx).unwrap();
462        let restored: KnowledgeContext = serde_json::from_str(&json).unwrap();
463        assert_eq!(restored.notes.len(), 1);
464        assert_eq!(restored.index_entries_used, 42);
465    }
466}