car-agents 0.14.0

//! Summarizer agent — compress context for handoff between agents.
//!
//! When Agent A produces a 10K token output and Agent B needs only the key
//! points, the Summarizer bridges them. Essential for pipeline workflows
//! where context grows with each step.

use crate::{AgentContext, AgentResult};
use car_inference::{GenerateParams, GenerateRequest};

/// Summarizer configuration.
#[derive(Debug, Clone)]
pub struct SummaryConfig {
    /// Target length for summary (in approximate tokens).
    pub target_tokens: usize,
    pub temperature: f64,
    pub model: Option<String>,
}

impl Default for SummaryConfig {
    fn default() -> Self {
        Self {
            target_tokens: 500,
            temperature: 0.2,
            model: None,
        }
    }
}

/// Summarizer: long context → compressed handoff.
pub struct Summarizer {
    ctx: AgentContext,
    config: SummaryConfig,
}

impl Summarizer {
    pub fn new(ctx: AgentContext) -> Self {
        Self {
            ctx,
            config: SummaryConfig::default(),
        }
    }

    pub fn with_config(ctx: AgentContext, config: SummaryConfig) -> Self {
        Self { ctx, config }
    }

    /// Summarize content, optionally focused on a specific aspect.
    pub async fn summarize(&self, content: &str, focus: Option<&str>) -> AgentResult {
        let focus_instruction = focus
            .map(|f| format!("\nFocus specifically on: {f}"))
            .unwrap_or_default();

        let prompt = format!(
            "Summarize the following content in approximately {} tokens. \
            Preserve all specific facts, numbers, names, and actionable items. \
            Drop generic preamble and filler.{focus_instruction}\n\n\
            Content:\n{content}",
            self.config.target_tokens,
        );

        let start = std::time::Instant::now();
        let req = GenerateRequest {
            prompt,
            model: self.config.model.clone(),
            params: GenerateParams {
                temperature: self.config.temperature,
                max_tokens: self.config.target_tokens * 2, // headroom
                ..Default::default()
            },
            context: None,
            tools: None,
            images: None,
            messages: None,
            cache_control: false,
            response_format: None,
            intent: None,
        };

        match self.ctx.inference.generate_tracked(req).await {
            Ok(result) => {
                let compression = if !content.is_empty() {
                    1.0 - (result.text.len() as f64 / content.len() as f64)
                } else {
                    0.0
                };
                AgentResult {
                    agent: "summarizer".into(),
                    output: result.text,
                    confidence: if compression > 0.3 { 0.8 } else { 0.5 },
                    model_used: result.model_used,
                    latency_ms: start.elapsed().as_millis() as u64,
                }
            }
            Err(e) => AgentResult {
                agent: "summarizer".into(),
                output: format!("Summarization failed: {}", e),
                confidence: 0.0,
                model_used: String::new(),
                latency_ms: start.elapsed().as_millis() as u64,
            },
        }
    }

    /// Synthesize a direct, user-facing answer from upstream research.
    ///
    /// Unlike `summarize()` (which compresses content for inter-agent handoff),
    /// this is for the FINAL step of a pipeline: the output is what the user
    /// actually sees. We use a different prompt that tells the LLM to write
    /// an answer — not to condense, not to drop "preamble", and explicitly
    /// NOT to turn the content into an ordered checklist of steps unless the
    /// user asked for steps.
    pub async fn synthesize_answer(&self, research: &str, goal: &str) -> AgentResult {
        // Detect broad review-shaped goals — these deserve a structured
        // multi-section answer rather than a one-paragraph summary.
        let g = goal.trim().to_lowercase();
        let is_broad_review = g.split_whitespace().count() < 8
            && (g.starts_with("review")
                || g.starts_with("analy")
                || g.contains("codebase")
                || g.starts_with("describe")
                || g.starts_with("overview")
                || g == "what is this");

        let structure_instruction = if is_broad_review {
            "\nBecause this is a broad review ask, structure your answer with ALL of the following sections. Fill each with concrete specifics (file paths, symbol names, numbers). Do not skip any section.\n\
             ## Overview\n  — one paragraph: what the project is and what it does, grounded in real components from the research.\n\
             ## Main Components\n  — the major subsystems/services, each with its directory path and a one-line purpose.\n\
             ## Key Integrations\n  — external systems (databases, auth, APIs, cloud services) and the files that handle them.\n\
             ## Top Risks or Gaps\n  — 3–5 concrete things that look fragile, under-tested, or deserve attention. Cite files.\n\
             ## Recommended Next Actions\n  — 3 high-value things a new engineer could do this week.\n"
        } else {
            ""
        };

        let prompt = format!(
            "You are writing the FINAL user-facing answer to a question about a codebase. \
            Another agent has already done the research. Your job is to turn that research \
            into a clear, direct, genuinely useful answer.\n\n\
            Rules:\n\
            1. ANSWER the user's question. Do not outline HOW to answer it. Do NOT return \
               a list of steps or a workflow unless the user explicitly asked for steps.\n\
            2. Be specific. Every claim should cite a file path, symbol, or number when \
               the research supports it. Vague statements (\"well-organized\", \"robust\") \
               are forbidden unless backed by evidence.\n\
            3. Use markdown structure (headings, bullets, code spans) to make the answer \
               scannable.\n\
            4. If the research is thin on a point, say so — do not invent details.\n\
            5. Lead with the answer. Minimal preamble.\n{structure_instruction}\n\
            ## User's question\n{goal}\n\n\
            ## Research\n{research}\n\n\
            Now write the final answer:"
        );

        let start = std::time::Instant::now();
        let req = GenerateRequest {
            prompt,
            model: self.config.model.clone(),
            params: GenerateParams {
                temperature: self.config.temperature.max(0.3),
                // Final answers can be much longer than a handoff summary. Cap
                // high so the LLM doesn't truncate mid-sentence on rich research.
                max_tokens: 4096,
                ..Default::default()
            },
            context: None,
            tools: None,
            images: None,
            messages: None,
            cache_control: false,
            response_format: None,
            intent: None,
        };

        match self.ctx.inference.generate_tracked(req).await {
            Ok(result) => AgentResult {
                agent: "summarizer".into(),
                output: result.text,
                confidence: 0.85,
                model_used: result.model_used,
                latency_ms: start.elapsed().as_millis() as u64,
            },
            Err(e) => AgentResult {
                agent: "summarizer".into(),
                output: format!("Synthesis failed: {}", e),
                confidence: 0.0,
                model_used: String::new(),
                latency_ms: start.elapsed().as_millis() as u64,
            },
        }
    }
}