Skip to main content

innate_core/
refine.rs

1use crate::errors::Result;
2use crate::utils::{sanitize, SanitizeAction};
3use serde_json::Value;
4
5// ---------------------------------------------------------------------------
6// Sanitizer — injectable content sanitizer (§二·六)
7// ---------------------------------------------------------------------------
8
9/// Replaceable sanitizer. Inject via `KnowledgeBase::open_with`.
10/// Default: `DefaultSanitizer` (wraps built-in heuristics).
11pub trait Sanitizer: Send + Sync {
12    fn sanitize(&self, content: &str) -> (String, SanitizeAction);
13}
14
15/// Built-in sanitizer — wraps `utils::sanitize()`.
16pub struct DefaultSanitizer;
17
18impl Sanitizer for DefaultSanitizer {
19    fn sanitize(&self, content: &str) -> (String, SanitizeAction) {
20        sanitize(content)
21    }
22}
23
24/// No-op sanitizer — passes content through unchanged (use to disable sanitization).
25pub struct NoopSanitizer;
26
27impl Sanitizer for NoopSanitizer {
28    fn sanitize(&self, content: &str) -> (String, SanitizeAction) {
29        (content.to_string(), SanitizeAction::Allow)
30    }
31}
32
33// ---------------------------------------------------------------------------
34// Refiner — online trim / adapt
35// ---------------------------------------------------------------------------
36
37/// Online refiner — trims or adapts recalled chunks.
38pub trait Refiner: Send + Sync {
39    fn refine(&self, chunks: Vec<Value>, budget_tokens: Option<usize>) -> Result<Vec<Value>>;
40
41    /// Trim a block to fit within `budget_tokens` given the active `query`.
42    /// Returns `None` if trimming is not supported or the block cannot be trimmed while
43    /// preserving hard-dep closure integrity.
44    fn trim(&self, _block: &[Value], _query: &str, _budget_tokens: usize) -> Option<Vec<Value>> {
45        None
46    }
47}
48
49/// No-op refiner (default): returns chunks unchanged, trim is unsupported.
50pub struct NullRefiner;
51
52impl Refiner for NullRefiner {
53    fn refine(&self, chunks: Vec<Value>, _budget: Option<usize>) -> Result<Vec<Value>> {
54        Ok(chunks)
55    }
56}
57
58/// Distiller — episodic log → new pending chunks.
59pub trait Distiller: Send + Sync {
60    fn distill(&self, log_entries: &[Value]) -> Result<Vec<DistilledChunk>>;
61}
62
63#[derive(Debug, Clone)]
64pub struct DistilledChunk {
65    pub content: String,
66    pub trigger_desc: Option<String>,
67    pub anti_trigger_desc: Option<String>,
68    pub source_log_id: String,
69    pub nomination: Option<String>,
70}
71
72/// Heuristic distiller: extracts chunks from log output / nomination fields.
73pub struct HeuristicDistiller;
74
75impl Distiller for HeuristicDistiller {
76    fn distill(&self, log_entries: &[Value]) -> Result<Vec<DistilledChunk>> {
77        let mut out = Vec::new();
78        for entry in log_entries {
79            let id = entry["id"].as_str().unwrap_or("").to_string();
80            // Use nomination text if present, else output_summary, else skip.
81            let text = entry["nomination"]
82                .as_str()
83                .or_else(|| entry["output_summary"].as_str());
84            if let Some(t) = text {
85                let t = t.trim();
86                if !t.is_empty() {
87                    // trigger_desc: prefer the recall query (it caused this log), else first
88                    // non-trivial line of content — gives the distilled chunk a useful retrieval signal.
89                    let trigger_desc = entry["query"]
90                        .as_str()
91                        .map(|q| q.trim().chars().take(80).collect::<String>())
92                        .filter(|q| !q.is_empty())
93                        .or_else(|| {
94                            t.lines()
95                                .map(str::trim)
96                                .find(|l| l.len() > 10)
97                                .map(|l| l.chars().take(80).collect())
98                        });
99                    out.push(DistilledChunk {
100                        content: t.to_string(),
101                        trigger_desc,
102                        anti_trigger_desc: None,
103                        source_log_id: id,
104                        nomination: entry["nomination"].as_str().map(str::to_string),
105                    });
106                }
107            }
108        }
109        Ok(out)
110    }
111}