use crate::{AgentContext, AgentResult};
use car_inference::{GenerateParams, GenerateRequest};
#[derive(Debug, Clone)]
pub struct SummaryConfig {
pub target_tokens: usize,
pub temperature: f64,
pub model: Option<String>,
}
impl Default for SummaryConfig {
fn default() -> Self {
Self {
target_tokens: 500,
temperature: 0.2,
model: None,
}
}
}
pub struct Summarizer {
ctx: AgentContext,
config: SummaryConfig,
}
impl Summarizer {
pub fn new(ctx: AgentContext) -> Self {
Self {
ctx,
config: SummaryConfig::default(),
}
}
pub fn with_config(ctx: AgentContext, config: SummaryConfig) -> Self {
Self { ctx, config }
}
pub async fn summarize(&self, content: &str, focus: Option<&str>) -> AgentResult {
let focus_instruction = focus
.map(|f| format!("\nFocus specifically on: {f}"))
.unwrap_or_default();
let prompt = format!(
"Summarize the following content in approximately {} tokens. \
Preserve all specific facts, numbers, names, and actionable items. \
Drop generic preamble and filler.{focus_instruction}\n\n\
Content:\n{content}",
self.config.target_tokens,
);
let start = std::time::Instant::now();
let req = GenerateRequest {
prompt,
model: self.config.model.clone(),
params: GenerateParams {
temperature: self.config.temperature,
max_tokens: self.config.target_tokens * 2, ..Default::default()
},
context: None,
tools: None,
images: None,
messages: None,
cache_control: false,
response_format: None,
intent: None,
};
match self.ctx.inference.generate_tracked(req).await {
Ok(result) => {
let compression = if !content.is_empty() {
1.0 - (result.text.len() as f64 / content.len() as f64)
} else {
0.0
};
AgentResult {
agent: "summarizer".into(),
output: result.text,
confidence: if compression > 0.3 { 0.8 } else { 0.5 },
model_used: result.model_used,
latency_ms: start.elapsed().as_millis() as u64,
}
}
Err(e) => AgentResult {
agent: "summarizer".into(),
output: format!("Summarization failed: {}", e),
confidence: 0.0,
model_used: String::new(),
latency_ms: start.elapsed().as_millis() as u64,
},
}
}
pub async fn synthesize_answer(&self, research: &str, goal: &str) -> AgentResult {
let g = goal.trim().to_lowercase();
let is_broad_review = g.split_whitespace().count() < 8
&& (g.starts_with("review")
|| g.starts_with("analy")
|| g.contains("codebase")
|| g.starts_with("describe")
|| g.starts_with("overview")
|| g == "what is this");
let structure_instruction = if is_broad_review {
"\nBecause this is a broad review ask, structure your answer with ALL of the following sections. Fill each with concrete specifics (file paths, symbol names, numbers). Do not skip any section.\n\
## Overview\n — one paragraph: what the project is and what it does, grounded in real components from the research.\n\
## Main Components\n — the major subsystems/services, each with its directory path and a one-line purpose.\n\
## Key Integrations\n — external systems (databases, auth, APIs, cloud services) and the files that handle them.\n\
## Top Risks or Gaps\n — 3–5 concrete things that look fragile, under-tested, or deserve attention. Cite files.\n\
## Recommended Next Actions\n — 3 high-value things a new engineer could do this week.\n"
} else {
""
};
let prompt = format!(
"You are writing the FINAL user-facing answer to a question about a codebase. \
Another agent has already done the research. Your job is to turn that research \
into a clear, direct, genuinely useful answer.\n\n\
Rules:\n\
1. ANSWER the user's question. Do not outline HOW to answer it. Do NOT return \
a list of steps or a workflow unless the user explicitly asked for steps.\n\
2. Be specific. Every claim should cite a file path, symbol, or number when \
the research supports it. Vague statements (\"well-organized\", \"robust\") \
are forbidden unless backed by evidence.\n\
3. Use markdown structure (headings, bullets, code spans) to make the answer \
scannable.\n\
4. If the research is thin on a point, say so — do not invent details.\n\
5. Lead with the answer. Minimal preamble.\n{structure_instruction}\n\
## User's question\n{goal}\n\n\
## Research\n{research}\n\n\
Now write the final answer:"
);
let start = std::time::Instant::now();
let req = GenerateRequest {
prompt,
model: self.config.model.clone(),
params: GenerateParams {
temperature: self.config.temperature.max(0.3),
max_tokens: 4096,
..Default::default()
},
context: None,
tools: None,
images: None,
messages: None,
cache_control: false,
response_format: None,
intent: None,
};
match self.ctx.inference.generate_tracked(req).await {
Ok(result) => AgentResult {
agent: "summarizer".into(),
output: result.text,
confidence: 0.85,
model_used: result.model_used,
latency_ms: start.elapsed().as_millis() as u64,
},
Err(e) => AgentResult {
agent: "summarizer".into(),
output: format!("Synthesis failed: {}", e),
confidence: 0.0,
model_used: String::new(),
latency_ms: start.elapsed().as_millis() as u64,
},
}
}
}