1use crate::errors::Result;
2use crate::utils::{sanitize, SanitizeAction};
3use serde_json::Value;
4
5pub trait Sanitizer: Send + Sync {
12 fn sanitize(&self, content: &str) -> (String, SanitizeAction);
13}
14
15pub struct DefaultSanitizer;
17
18impl Sanitizer for DefaultSanitizer {
19 fn sanitize(&self, content: &str) -> (String, SanitizeAction) {
20 sanitize(content)
21 }
22}
23
24pub struct NoopSanitizer;
26
27impl Sanitizer for NoopSanitizer {
28 fn sanitize(&self, content: &str) -> (String, SanitizeAction) {
29 (content.to_string(), SanitizeAction::Allow)
30 }
31}
32
33pub trait Refiner: Send + Sync {
39 fn refine(&self, chunks: Vec<Value>, budget_tokens: Option<usize>) -> Result<Vec<Value>>;
40
41 fn trim(&self, _block: &[Value], _query: &str, _budget_tokens: usize) -> Option<Vec<Value>> {
45 None
46 }
47}
48
49pub struct NullRefiner;
51
52impl Refiner for NullRefiner {
53 fn refine(&self, chunks: Vec<Value>, _budget: Option<usize>) -> Result<Vec<Value>> {
54 Ok(chunks)
55 }
56}
57
58pub trait Distiller: Send + Sync {
60 fn distill(&self, log_entries: &[Value]) -> Result<Vec<DistilledChunk>>;
61}
62
63#[derive(Debug, Clone)]
64pub struct DistilledChunk {
65 pub content: String,
66 pub trigger_desc: Option<String>,
67 pub anti_trigger_desc: Option<String>,
68 pub source_log_id: String,
69 pub nomination: Option<String>,
70}
71
72pub struct HeuristicDistiller;
74
75impl Distiller for HeuristicDistiller {
76 fn distill(&self, log_entries: &[Value]) -> Result<Vec<DistilledChunk>> {
77 let mut out = Vec::new();
78 for entry in log_entries {
79 let id = entry["id"].as_str().unwrap_or("").to_string();
80 let text = entry["nomination"]
82 .as_str()
83 .or_else(|| entry["output_summary"].as_str());
84 if let Some(t) = text {
85 let t = t.trim();
86 if !t.is_empty() {
87 let trigger_desc = entry["query"]
90 .as_str()
91 .map(|q| q.trim().chars().take(80).collect::<String>())
92 .filter(|q| !q.is_empty())
93 .or_else(|| {
94 t.lines()
95 .map(str::trim)
96 .find(|l| l.len() > 10)
97 .map(|l| l.chars().take(80).collect())
98 });
99 out.push(DistilledChunk {
100 content: t.to_string(),
101 trigger_desc,
102 anti_trigger_desc: None,
103 source_log_id: id,
104 nomination: entry["nomination"].as_str().map(str::to_string),
105 });
106 }
107 }
108 }
109 Ok(out)
110 }
111}