use crate::errors::Result;
use crate::utils::{sanitize, SanitizeAction};
use serde_json::Value;
use std::sync::Arc;
pub trait Sanitizer: Send + Sync {
fn sanitize(&self, content: &str) -> (String, SanitizeAction);
}
pub struct DefaultSanitizer;
impl Sanitizer for DefaultSanitizer {
fn sanitize(&self, content: &str) -> (String, SanitizeAction) {
sanitize(content)
}
}
pub struct NoopSanitizer;
impl Sanitizer for NoopSanitizer {
fn sanitize(&self, content: &str) -> (String, SanitizeAction) {
(content.to_string(), SanitizeAction::Allow)
}
}
pub trait Refiner: Send + Sync {
fn refine(&self, chunks: Vec<Value>, budget_tokens: Option<usize>) -> Result<Vec<Value>>;
fn trim(&self, _block: &[Value], _query: &str, _budget_tokens: usize) -> Option<Vec<Value>> {
None
}
}
pub struct NullRefiner;
impl Refiner for NullRefiner {
fn refine(&self, chunks: Vec<Value>, _budget: Option<usize>) -> Result<Vec<Value>> {
Ok(chunks)
}
}
pub trait Reranker: Send + Sync {
fn rerank(&self, query: &str, candidates: &[Value]) -> Result<Vec<String>>;
}
pub struct NoopReranker;
impl Reranker for NoopReranker {
fn rerank(&self, _query: &str, candidates: &[Value]) -> Result<Vec<String>> {
Ok(candidates
.iter()
.filter_map(|c| c.get("id").and_then(Value::as_str).map(str::to_string))
.collect())
}
}
pub trait Distiller: Send + Sync {
fn distill(&self, log_entries: &[Value]) -> Result<Vec<DistilledChunk>>;
fn distill_with_context(
&self,
primary: &Value,
_related_logs: &[Value],
) -> Result<Vec<DistilledChunk>> {
self.distill(std::slice::from_ref(primary))
}
fn provenance(&self) -> DistillProvenance {
DistillProvenance::default()
}
}
#[derive(Debug, Default, Clone)]
pub struct DistillProvenance {
pub provider: Option<String>,
pub model: Option<String>,
pub prompt_version: Option<String>,
}
#[derive(Debug, Clone, Default)]
pub struct DistilledChunk {
pub content: String,
pub skill_name: Option<String>,
pub trigger_desc: Option<String>,
pub anti_trigger_desc: Option<String>,
pub source_log_id: String,
pub nomination: Option<String>,
pub provider_override: Option<String>,
}
pub struct HeuristicDistiller;
impl Distiller for HeuristicDistiller {
fn distill(&self, log_entries: &[Value]) -> Result<Vec<DistilledChunk>> {
let mut out = Vec::new();
for entry in log_entries {
let id = entry["id"].as_str().unwrap_or("").to_string();
let nomination = entry["nomination"].as_str();
let text = nomination.or_else(|| entry["output_summary"].as_str());
if let Some(t) = text {
let t = t.trim();
if !t.is_empty() {
let query = entry["query"].as_str().map(str::trim).unwrap_or("");
let outcome = entry["outcome"].as_str().unwrap_or("");
let trigger_desc = entry["query"]
.as_str()
.map(|q| q.trim().chars().take(80).collect::<String>())
.filter(|q| !q.is_empty())
.or_else(|| {
t.lines()
.map(str::trim)
.find(|l| l.len() > 10)
.map(|l| l.chars().take(80).collect())
});
let content = if nomination.is_some() {
t.to_string()
} else if outcome == "fail" {
format!("Avoid: {t}")
} else {
t.to_string()
};
let anti_trigger_desc = if outcome == "fail" && !query.is_empty() {
Some(query.chars().take(60).collect::<String>())
} else {
None
};
let skill_name = trigger_desc
.as_deref()
.map(|t| t.split_whitespace().take(3).collect::<Vec<_>>().join(" "))
.filter(|s| !s.is_empty());
out.push(DistilledChunk {
content,
skill_name,
trigger_desc,
anti_trigger_desc,
source_log_id: id,
nomination: entry["nomination"].as_str().map(str::to_string),
provider_override: None,
});
}
}
}
Ok(out)
}
fn provenance(&self) -> DistillProvenance {
DistillProvenance {
provider: Some("heuristic".to_string()),
model: None,
prompt_version: Some("3".to_string()),
}
}
}
pub struct ResilientDistiller {
primary: Arc<dyn Distiller>,
fallback: Arc<dyn Distiller>,
llm_attempt_budget: i64,
}
impl ResilientDistiller {
pub fn new(
primary: Arc<dyn Distiller>,
fallback: Arc<dyn Distiller>,
llm_attempt_budget: i64,
) -> Self {
Self {
primary,
fallback,
llm_attempt_budget,
}
}
fn budget_exhausted(&self, log: &Value) -> bool {
log.get("distill_attempts")
.and_then(Value::as_i64)
.unwrap_or(0)
>= self.llm_attempt_budget
}
fn tag_fallback(mut chunks: Vec<DistilledChunk>) -> Vec<DistilledChunk> {
for c in &mut chunks {
c.provider_override = Some("heuristic_fallback".to_string());
}
chunks
}
}
impl Distiller for ResilientDistiller {
fn distill(&self, log_entries: &[Value]) -> Result<Vec<DistilledChunk>> {
match self.primary.distill(log_entries) {
Ok(chunks) => Ok(chunks),
Err(e) => {
let exhausted = log_entries
.first()
.map(|l| self.budget_exhausted(l))
.unwrap_or(false);
if exhausted {
Ok(Self::tag_fallback(self.fallback.distill(log_entries)?))
} else {
Err(e)
}
}
}
}
fn distill_with_context(
&self,
primary: &Value,
related_logs: &[Value],
) -> Result<Vec<DistilledChunk>> {
match self.primary.distill_with_context(primary, related_logs) {
Ok(chunks) => Ok(chunks),
Err(e) => {
if self.budget_exhausted(primary) {
Ok(Self::tag_fallback(
self.fallback.distill_with_context(primary, related_logs)?,
))
} else {
Err(e)
}
}
}
}
fn provenance(&self) -> DistillProvenance {
self.primary.provenance()
}
}