1use crate::errors::Result;
2use crate::utils::{sanitize, SanitizeAction};
3use serde_json::Value;
4use std::sync::Arc;
5
6pub trait Sanitizer: Send + Sync {
13 fn sanitize(&self, content: &str) -> (String, SanitizeAction);
14}
15
16pub struct DefaultSanitizer;
18
19impl Sanitizer for DefaultSanitizer {
20 fn sanitize(&self, content: &str) -> (String, SanitizeAction) {
21 sanitize(content)
22 }
23}
24
25pub struct NoopSanitizer;
27
28impl Sanitizer for NoopSanitizer {
29 fn sanitize(&self, content: &str) -> (String, SanitizeAction) {
30 (content.to_string(), SanitizeAction::Allow)
31 }
32}
33
34pub trait Refiner: Send + Sync {
40 fn refine(&self, chunks: Vec<Value>, budget_tokens: Option<usize>) -> Result<Vec<Value>>;
41
42 fn trim(&self, _block: &[Value], _query: &str, _budget_tokens: usize) -> Option<Vec<Value>> {
46 None
47 }
48}
49
50pub struct NullRefiner;
52
53impl Refiner for NullRefiner {
54 fn refine(&self, chunks: Vec<Value>, _budget: Option<usize>) -> Result<Vec<Value>> {
55 Ok(chunks)
56 }
57}
58
59pub trait Distiller: Send + Sync {
61 fn distill(&self, log_entries: &[Value]) -> Result<Vec<DistilledChunk>>;
62
63 fn distill_with_context(
64 &self,
65 primary: &Value,
66 _related_logs: &[Value],
67 ) -> Result<Vec<DistilledChunk>> {
68 self.distill(std::slice::from_ref(primary))
69 }
70
71 fn provenance(&self) -> DistillProvenance {
72 DistillProvenance::default()
73 }
74}
75
76#[derive(Debug, Default, Clone)]
77pub struct DistillProvenance {
78 pub provider: Option<String>,
79 pub model: Option<String>,
80 pub prompt_version: Option<String>,
81}
82
83#[derive(Debug, Clone, Default)]
84pub struct DistilledChunk {
85 pub content: String,
86 pub skill_name: Option<String>,
89 pub trigger_desc: Option<String>,
90 pub anti_trigger_desc: Option<String>,
91 pub source_log_id: String,
92 pub nomination: Option<String>,
93 pub provider_override: Option<String>,
98}
99
100pub struct HeuristicDistiller;
102
103impl Distiller for HeuristicDistiller {
104 fn distill(&self, log_entries: &[Value]) -> Result<Vec<DistilledChunk>> {
105 let mut out = Vec::new();
106 for entry in log_entries {
107 let id = entry["id"].as_str().unwrap_or("").to_string();
108 let nomination = entry["nomination"].as_str();
109 let text = nomination.or_else(|| entry["output_summary"].as_str());
110 if let Some(t) = text {
111 let t = t.trim();
112 if !t.is_empty() {
113 let query = entry["query"].as_str().map(str::trim).unwrap_or("");
114 let outcome = entry["outcome"].as_str().unwrap_or("");
115
116 let trigger_desc = entry["query"]
120 .as_str()
121 .map(|q| q.trim().chars().take(80).collect::<String>())
122 .filter(|q| !q.is_empty())
123 .or_else(|| {
124 t.lines()
125 .map(str::trim)
126 .find(|l| l.len() > 10)
127 .map(|l| l.chars().take(80).collect())
128 });
129
130 let content = if nomination.is_some() {
133 t.to_string()
134 } else if outcome == "fail" {
135 format!("Avoid: {t}")
136 } else {
137 t.to_string()
138 };
139
140 let anti_trigger_desc = if outcome == "fail" && !query.is_empty() {
142 Some(query.chars().take(60).collect::<String>())
143 } else {
144 None
145 };
146
147 let skill_name = trigger_desc
149 .as_deref()
150 .map(|t| t.split_whitespace().take(3).collect::<Vec<_>>().join(" "))
151 .filter(|s| !s.is_empty());
152
153 out.push(DistilledChunk {
154 content,
155 skill_name,
156 trigger_desc,
157 anti_trigger_desc,
158 source_log_id: id,
159 nomination: entry["nomination"].as_str().map(str::to_string),
160 provider_override: None,
161 });
162 }
163 }
164 }
165 Ok(out)
166 }
167
168 fn provenance(&self) -> DistillProvenance {
169 DistillProvenance {
170 provider: Some("heuristic".to_string()),
171 model: None,
172 prompt_version: Some("3".to_string()),
173 }
174 }
175}
176
177pub struct ResilientDistiller {
195 primary: Arc<dyn Distiller>,
196 fallback: Arc<dyn Distiller>,
197 llm_attempt_budget: i64,
198}
199
200impl ResilientDistiller {
201 pub fn new(
202 primary: Arc<dyn Distiller>,
203 fallback: Arc<dyn Distiller>,
204 llm_attempt_budget: i64,
205 ) -> Self {
206 Self {
207 primary,
208 fallback,
209 llm_attempt_budget,
210 }
211 }
212
213 fn budget_exhausted(&self, log: &Value) -> bool {
214 log.get("distill_attempts")
215 .and_then(Value::as_i64)
216 .unwrap_or(0)
217 >= self.llm_attempt_budget
218 }
219
220 fn tag_fallback(mut chunks: Vec<DistilledChunk>) -> Vec<DistilledChunk> {
221 for c in &mut chunks {
222 c.provider_override = Some("heuristic_fallback".to_string());
223 }
224 chunks
225 }
226}
227
228impl Distiller for ResilientDistiller {
229 fn distill(&self, log_entries: &[Value]) -> Result<Vec<DistilledChunk>> {
230 match self.primary.distill(log_entries) {
231 Ok(chunks) => Ok(chunks),
232 Err(e) => {
233 let exhausted = log_entries
234 .first()
235 .map(|l| self.budget_exhausted(l))
236 .unwrap_or(false);
237 if exhausted {
238 Ok(Self::tag_fallback(self.fallback.distill(log_entries)?))
239 } else {
240 Err(e)
241 }
242 }
243 }
244 }
245
246 fn distill_with_context(
247 &self,
248 primary: &Value,
249 related_logs: &[Value],
250 ) -> Result<Vec<DistilledChunk>> {
251 match self.primary.distill_with_context(primary, related_logs) {
252 Ok(chunks) => Ok(chunks),
253 Err(e) => {
254 if self.budget_exhausted(primary) {
255 Ok(Self::tag_fallback(
256 self.fallback.distill_with_context(primary, related_logs)?,
257 ))
258 } else {
259 Err(e)
260 }
261 }
262 }
263 }
264
265 fn provenance(&self) -> DistillProvenance {
266 self.primary.provenance()
267 }
268}