Skip to main content

rover/mcp/tools/
summarize.rs

1//! MCP `summarize` tool.
2//!
3//! Cache-or-fetch the page (M2/M3 cache hot path), dispatch through
4//! [`SummarizerService`] (Task 7), then render the response envelope.
5//! Synchronous; no task spawning.
6
7use schemars::JsonSchema;
8use serde::{Deserialize, Serialize};
9use url::Url;
10
11use crate::extractor::pipeline::extract;
12use crate::fetcher::cached::{ExtractResult, FetchOptions, fetch_with_cache, sha256_hex};
13use crate::mcp::envelope::{
14    SummarizeMetadata, SummarizeResponse, SummarizerFallbackInfo, SummaryCacheStatusWire,
15};
16use crate::mcp::error::McpError;
17use crate::mcp::handler::{RoverHandler, resolve_tokenizer};
18use crate::summarizer::backend::{CompactMode, PreserveSection, Style};
19use crate::summarizer::{DefaultsHint, SummaryCacheStatus};
20use crate::tokenizer;
21
22/// Wire-side `summarize` args. All fields except `url` are optional;
23/// defaults come from `[summarization]`.
24#[derive(Debug, Clone, Default, Serialize, Deserialize, JsonSchema)]
25#[serde(deny_unknown_fields)]
26pub struct SummarizeArgs {
27    pub url: String,
28
29    #[serde(default)]
30    pub target_tokens: Option<usize>,
31
32    #[serde(default)]
33    pub mode: Option<SummarizeMode>,
34
35    #[serde(default)]
36    pub focus: Option<String>,
37
38    #[serde(default)]
39    pub preserve: Vec<SummarizePreserve>,
40
41    #[serde(default)]
42    pub style: Option<SummarizeStyle>,
43
44    #[serde(default)]
45    pub backend: Option<String>,
46
47    #[serde(default)]
48    pub tokenizer: Option<String>,
49
50    #[serde(default)]
51    pub security: Option<crate::guard::SecurityArg>,
52}
53
54#[derive(Debug, Clone, Copy, Serialize, Deserialize, JsonSchema)]
55#[serde(rename_all = "snake_case")]
56pub enum SummarizeMode {
57    Extractive,
58    Abstractive,
59    Headlines,
60}
61
62impl From<SummarizeMode> for CompactMode {
63    fn from(v: SummarizeMode) -> Self {
64        match v {
65            SummarizeMode::Extractive => CompactMode::Extractive,
66            SummarizeMode::Abstractive => CompactMode::Abstractive,
67            SummarizeMode::Headlines => CompactMode::Headlines,
68        }
69    }
70}
71
72#[derive(Debug, Clone, Copy, Serialize, Deserialize, JsonSchema)]
73#[serde(rename_all = "snake_case")]
74pub enum SummarizeStyle {
75    Bullet,
76    Prose,
77    Executive,
78}
79
80impl From<SummarizeStyle> for Style {
81    fn from(v: SummarizeStyle) -> Self {
82        match v {
83            SummarizeStyle::Bullet => Style::Bullet,
84            SummarizeStyle::Prose => Style::Prose,
85            SummarizeStyle::Executive => Style::Executive,
86        }
87    }
88}
89
90#[derive(Debug, Clone, Copy, Serialize, Deserialize, JsonSchema)]
91#[serde(rename_all = "snake_case")]
92pub enum SummarizePreserve {
93    Code,
94    Tables,
95    Quotes,
96    Lists,
97}
98
99impl From<SummarizePreserve> for PreserveSection {
100    fn from(v: SummarizePreserve) -> Self {
101        match v {
102            SummarizePreserve::Code => PreserveSection::Code,
103            SummarizePreserve::Tables => PreserveSection::Tables,
104            SummarizePreserve::Quotes => PreserveSection::Quotes,
105            SummarizePreserve::Lists => PreserveSection::Lists,
106        }
107    }
108}
109
110impl RoverHandler {
111    /// Tool body, decoupled from the `#[tool]` macro for unit testing.
112    pub async fn summarize_inner(
113        &self,
114        args: SummarizeArgs,
115    ) -> Result<SummarizeResponse, McpError> {
116        let url = Url::parse(&args.url).map_err(|e| McpError::InvalidUrl(e.to_string()))?;
117        let family = resolve_tokenizer(args.tokenizer.as_deref(), &self.config)?;
118        tokenizer::ensure_loaded(family).await?;
119
120        // Cache-or-fetch the page.
121        let result = fetch_with_cache(
122            &self.db,
123            &self.client,
124            &self.pacer,
125            &self.config.rate_limit,
126            &self.config.robots,
127            &url,
128            &self.config.cache,
129            FetchOptions {
130                force_refresh: false,
131                ssrf_level: self.ssrf_level,
132                ssrf_project_root: self.ssrf_project_root.clone(),
133                har_recorder: self.har_recorder.clone(),
134                ignore_robots: false,
135                user_agent: self.config.fetch.user_agent.clone(),
136                #[cfg(feature = "headless")]
137                headless: None,
138                headless_mode: crate::fetcher::HeadlessMode::Off,
139                synchronous_revalidation: false,
140            },
141            |body, base| {
142                let extracted =
143                    extract(body, Some(base)).map_err(crate::fetcher::FetcherError::Extract)?;
144                let content_hash = format!("sha256:{}", sha256_hex(extracted.body_md.as_bytes()));
145                Ok(ExtractResult {
146                    title: extracted.title,
147                    body_md: extracted.body_md,
148                    content_hash,
149                    metadata: extracted.metadata,
150                })
151            },
152        )
153        .await?;
154
155        let defaults = DefaultsHint::from_config(&self.config.summarization);
156        let opts = self.summarizer.resolve_defaults(
157            args.mode.map(Into::into),
158            args.style.map(Into::into),
159            args.target_tokens,
160            args.focus,
161            args.preserve.into_iter().map(Into::into).collect(),
162            args.backend,
163            &defaults,
164        );
165
166        let summary = self
167            .summarizer
168            .compact(&result.page.content_hash, &result.page.extracted_md, &opts)
169            .await?;
170
171        let estimated_tokens = tokenizer::count(&summary.summary_md, family)?;
172
173        let assessment =
174            self.guard
175                .assess(url.as_str(), args.security.as_ref(), &summary.summary_md);
176        let content = self
177            .guard
178            .finish(&assessment, "", &assessment.acted_body, true);
179
180        Ok(SummarizeResponse {
181            content,
182            metadata: SummarizeMetadata {
183                backend: summary.effective_backend,
184                mode: opts.mode.as_str().to_string(),
185                style: opts.style.as_str().to_string(),
186                target_tokens: opts.target_tokens,
187                estimated_tokens,
188                cache_status: match summary.cache_status {
189                    SummaryCacheStatus::Hit => SummaryCacheStatusWire::Hit,
190                    SummaryCacheStatus::Miss => SummaryCacheStatusWire::Miss,
191                },
192                summarizer_fallback: summary.fallback.map(|f| SummarizerFallbackInfo {
193                    from: f.from,
194                    reason: f.reason.to_string(),
195                }),
196                source_url: url.as_str().to_string(),
197                source_fetched_at: jiff::Timestamp::from_second(result.page.fetched_at)
198                    .map(|t| t.to_string())
199                    .unwrap_or_default(),
200                focus: opts.focus,
201                preserve: opts
202                    .preserve
203                    .iter()
204                    .map(|p| p.as_str().to_string())
205                    .collect(),
206                prompt_injection: assessment.telemetry,
207            },
208        })
209    }
210}
211
212#[cfg(test)]
213mod tests {
214    use super::*;
215
216    #[test]
217    fn schema_round_trips_required_fields() {
218        let schema = schemars::schema_for!(SummarizeArgs);
219        let json = serde_json::to_string(&schema).unwrap();
220        for f in [
221            "url",
222            "target_tokens",
223            "mode",
224            "focus",
225            "preserve",
226            "style",
227            "backend",
228        ] {
229            assert!(json.contains(f), "missing {f}");
230        }
231    }
232
233    #[test]
234    fn enum_mappings_round_trip() {
235        assert_eq!(
236            CompactMode::from(SummarizeMode::Headlines),
237            CompactMode::Headlines,
238        );
239        assert_eq!(Style::from(SummarizeStyle::Bullet), Style::Bullet);
240        assert_eq!(
241            PreserveSection::from(SummarizePreserve::Tables),
242            PreserveSection::Tables,
243        );
244    }
245
246    #[test]
247    fn rejects_unknown_field() {
248        let r: Result<SummarizeArgs, _> = serde_json::from_str(r#"{"url":"https://x/","bogus":1}"#);
249        assert!(r.is_err());
250    }
251}