Skip to main content

car_agents/
summarizer.rs

1//! Summarizer agent — compress context for handoff between agents.
2//!
3//! When Agent A produces a 10K token output and Agent B needs only the key
4//! points, the Summarizer bridges them. Essential for pipeline workflows
5//! where context grows with each step.
6
7use crate::{AgentContext, AgentResult};
8use car_inference::{GenerateParams, GenerateRequest};
9
10/// Summarizer configuration.
11#[derive(Debug, Clone)]
12pub struct SummaryConfig {
13    /// Target length for summary (in approximate tokens).
14    pub target_tokens: usize,
15    pub temperature: f64,
16    pub model: Option<String>,
17}
18
19impl Default for SummaryConfig {
20    fn default() -> Self {
21        Self {
22            target_tokens: 500,
23            temperature: 0.2,
24            model: None,
25        }
26    }
27}
28
29/// Summarizer: long context → compressed handoff.
30pub struct Summarizer {
31    ctx: AgentContext,
32    config: SummaryConfig,
33}
34
35impl Summarizer {
36    pub fn new(ctx: AgentContext) -> Self {
37        Self {
38            ctx,
39            config: SummaryConfig::default(),
40        }
41    }
42
43    pub fn with_config(ctx: AgentContext, config: SummaryConfig) -> Self {
44        Self { ctx, config }
45    }
46
47    /// Summarize content, optionally focused on a specific aspect.
48    pub async fn summarize(&self, content: &str, focus: Option<&str>) -> AgentResult {
49        let focus_instruction = focus
50            .map(|f| format!("\nFocus specifically on: {f}"))
51            .unwrap_or_default();
52
53        let prompt = format!(
54            "Summarize the following content in approximately {} tokens. \
55            Preserve all specific facts, numbers, names, and actionable items. \
56            Drop generic preamble and filler.{focus_instruction}\n\n\
57            Content:\n{content}",
58            self.config.target_tokens,
59        );
60
61        let start = std::time::Instant::now();
62        let req = GenerateRequest {
63            prompt,
64            model: self.config.model.clone(),
65            params: GenerateParams {
66                temperature: self.config.temperature,
67                max_tokens: self.config.target_tokens * 2, // headroom
68                ..Default::default()
69            },
70            context: None,
71            tools: None,
72            images: None,
73            messages: None,
74            cache_control: false,
75            response_format: None,
76            intent: None,
77        };
78
79        match self.ctx.inference.generate_tracked(req).await {
80            Ok(result) => {
81                let compression = if !content.is_empty() {
82                    1.0 - (result.text.len() as f64 / content.len() as f64)
83                } else {
84                    0.0
85                };
86                AgentResult {
87                    agent: "summarizer".into(),
88                    output: result.text,
89                    confidence: if compression > 0.3 { 0.8 } else { 0.5 },
90                    model_used: result.model_used,
91                    latency_ms: start.elapsed().as_millis() as u64,
92                }
93            }
94            Err(e) => AgentResult {
95                agent: "summarizer".into(),
96                output: format!("Summarization failed: {}", e),
97                confidence: 0.0,
98                model_used: String::new(),
99                latency_ms: start.elapsed().as_millis() as u64,
100            },
101        }
102    }
103
104    /// Synthesize a direct, user-facing answer from upstream research.
105    ///
106    /// Unlike `summarize()` (which compresses content for inter-agent handoff),
107    /// this is for the FINAL step of a pipeline: the output is what the user
108    /// actually sees. We use a different prompt that tells the LLM to write
109    /// an answer — not to condense, not to drop "preamble", and explicitly
110    /// NOT to turn the content into an ordered checklist of steps unless the
111    /// user asked for steps.
112    pub async fn synthesize_answer(&self, research: &str, goal: &str) -> AgentResult {
113        // Detect broad review-shaped goals — these deserve a structured
114        // multi-section answer rather than a one-paragraph summary.
115        let g = goal.trim().to_lowercase();
116        let is_broad_review = g.split_whitespace().count() < 8
117            && (g.starts_with("review")
118                || g.starts_with("analy")
119                || g.contains("codebase")
120                || g.starts_with("describe")
121                || g.starts_with("overview")
122                || g == "what is this");
123
124        let structure_instruction = if is_broad_review {
125            "\nBecause this is a broad review ask, structure your answer with ALL of the following sections. Fill each with concrete specifics (file paths, symbol names, numbers). Do not skip any section.\n\
126             ## Overview\n  — one paragraph: what the project is and what it does, grounded in real components from the research.\n\
127             ## Main Components\n  — the major subsystems/services, each with its directory path and a one-line purpose.\n\
128             ## Key Integrations\n  — external systems (databases, auth, APIs, cloud services) and the files that handle them.\n\
129             ## Top Risks or Gaps\n  — 3–5 concrete things that look fragile, under-tested, or deserve attention. Cite files.\n\
130             ## Recommended Next Actions\n  — 3 high-value things a new engineer could do this week.\n"
131        } else {
132            ""
133        };
134
135        let prompt = format!(
136            "You are writing the FINAL user-facing answer to a question about a codebase. \
137            Another agent has already done the research. Your job is to turn that research \
138            into a clear, direct, genuinely useful answer.\n\n\
139            Rules:\n\
140            1. ANSWER the user's question. Do not outline HOW to answer it. Do NOT return \
141               a list of steps or a workflow unless the user explicitly asked for steps.\n\
142            2. Be specific. Every claim should cite a file path, symbol, or number when \
143               the research supports it. Vague statements (\"well-organized\", \"robust\") \
144               are forbidden unless backed by evidence.\n\
145            3. Use markdown structure (headings, bullets, code spans) to make the answer \
146               scannable.\n\
147            4. If the research is thin on a point, say so — do not invent details.\n\
148            5. Lead with the answer. Minimal preamble.\n{structure_instruction}\n\
149            ## User's question\n{goal}\n\n\
150            ## Research\n{research}\n\n\
151            Now write the final answer:"
152        );
153
154        let start = std::time::Instant::now();
155        let req = GenerateRequest {
156            prompt,
157            model: self.config.model.clone(),
158            params: GenerateParams {
159                temperature: self.config.temperature.max(0.3),
160                // Final answers can be much longer than a handoff summary. Cap
161                // high so the LLM doesn't truncate mid-sentence on rich research.
162                max_tokens: 4096,
163                ..Default::default()
164            },
165            context: None,
166            tools: None,
167            images: None,
168            messages: None,
169            cache_control: false,
170            response_format: None,
171            intent: None,
172        };
173
174        match self.ctx.inference.generate_tracked(req).await {
175            Ok(result) => AgentResult {
176                agent: "summarizer".into(),
177                output: result.text,
178                confidence: 0.85,
179                model_used: result.model_used,
180                latency_ms: start.elapsed().as_millis() as u64,
181            },
182            Err(e) => AgentResult {
183                agent: "summarizer".into(),
184                output: format!("Synthesis failed: {}", e),
185                confidence: 0.0,
186                model_used: String::new(),
187                latency_ms: start.elapsed().as_millis() as u64,
188            },
189        }
190    }
191}