1use schemars::JsonSchema;
8use serde::{Deserialize, Serialize};
9use url::Url;
10
11use crate::extractor::pipeline::extract;
12use crate::fetcher::cached::{ExtractResult, FetchOptions, fetch_with_cache, sha256_hex};
13use crate::mcp::envelope::{
14 SummarizeMetadata, SummarizeResponse, SummarizerFallbackInfo, SummaryCacheStatusWire,
15};
16use crate::mcp::error::McpError;
17use crate::mcp::handler::{RoverHandler, resolve_tokenizer};
18use crate::summarizer::backend::{CompactMode, PreserveSection, Style};
19use crate::summarizer::{DefaultsHint, SummaryCacheStatus};
20use crate::tokenizer;
21
22#[derive(Debug, Clone, Default, Serialize, Deserialize, JsonSchema)]
25#[serde(deny_unknown_fields)]
26pub struct SummarizeArgs {
27 pub url: String,
28
29 #[serde(default)]
30 pub target_tokens: Option<usize>,
31
32 #[serde(default)]
33 pub mode: Option<SummarizeMode>,
34
35 #[serde(default)]
36 pub focus: Option<String>,
37
38 #[serde(default)]
39 pub preserve: Vec<SummarizePreserve>,
40
41 #[serde(default)]
42 pub style: Option<SummarizeStyle>,
43
44 #[serde(default)]
45 pub backend: Option<String>,
46
47 #[serde(default)]
48 pub tokenizer: Option<String>,
49
50 #[serde(default)]
51 pub security: Option<crate::guard::SecurityArg>,
52}
53
54#[derive(Debug, Clone, Copy, Serialize, Deserialize, JsonSchema)]
55#[serde(rename_all = "snake_case")]
56pub enum SummarizeMode {
57 Extractive,
58 Abstractive,
59 Headlines,
60}
61
62impl From<SummarizeMode> for CompactMode {
63 fn from(v: SummarizeMode) -> Self {
64 match v {
65 SummarizeMode::Extractive => CompactMode::Extractive,
66 SummarizeMode::Abstractive => CompactMode::Abstractive,
67 SummarizeMode::Headlines => CompactMode::Headlines,
68 }
69 }
70}
71
72#[derive(Debug, Clone, Copy, Serialize, Deserialize, JsonSchema)]
73#[serde(rename_all = "snake_case")]
74pub enum SummarizeStyle {
75 Bullet,
76 Prose,
77 Executive,
78}
79
80impl From<SummarizeStyle> for Style {
81 fn from(v: SummarizeStyle) -> Self {
82 match v {
83 SummarizeStyle::Bullet => Style::Bullet,
84 SummarizeStyle::Prose => Style::Prose,
85 SummarizeStyle::Executive => Style::Executive,
86 }
87 }
88}
89
90#[derive(Debug, Clone, Copy, Serialize, Deserialize, JsonSchema)]
91#[serde(rename_all = "snake_case")]
92pub enum SummarizePreserve {
93 Code,
94 Tables,
95 Quotes,
96 Lists,
97}
98
99impl From<SummarizePreserve> for PreserveSection {
100 fn from(v: SummarizePreserve) -> Self {
101 match v {
102 SummarizePreserve::Code => PreserveSection::Code,
103 SummarizePreserve::Tables => PreserveSection::Tables,
104 SummarizePreserve::Quotes => PreserveSection::Quotes,
105 SummarizePreserve::Lists => PreserveSection::Lists,
106 }
107 }
108}
109
110impl RoverHandler {
111 pub async fn summarize_inner(
113 &self,
114 args: SummarizeArgs,
115 ) -> Result<SummarizeResponse, McpError> {
116 let url = Url::parse(&args.url).map_err(|e| McpError::InvalidUrl(e.to_string()))?;
117 let family = resolve_tokenizer(args.tokenizer.as_deref(), &self.config)?;
118 tokenizer::ensure_loaded(family).await?;
119
120 let result = fetch_with_cache(
122 &self.db,
123 &self.client,
124 &self.pacer,
125 &self.config.rate_limit,
126 &self.config.robots,
127 &url,
128 &self.config.cache,
129 FetchOptions {
130 force_refresh: false,
131 ssrf_level: self.ssrf_level,
132 ssrf_project_root: self.ssrf_project_root.clone(),
133 har_recorder: self.har_recorder.clone(),
134 ignore_robots: false,
135 user_agent: self.config.fetch.user_agent.clone(),
136 #[cfg(feature = "headless")]
137 headless: None,
138 headless_mode: crate::fetcher::HeadlessMode::Off,
139 synchronous_revalidation: false,
140 },
141 |body, base| {
142 let extracted =
143 extract(body, Some(base)).map_err(crate::fetcher::FetcherError::Extract)?;
144 let content_hash = format!("sha256:{}", sha256_hex(extracted.body_md.as_bytes()));
145 Ok(ExtractResult {
146 title: extracted.title,
147 body_md: extracted.body_md,
148 content_hash,
149 metadata: extracted.metadata,
150 })
151 },
152 )
153 .await?;
154
155 let defaults = DefaultsHint::from_config(&self.config.summarization);
156 let opts = self.summarizer.resolve_defaults(
157 args.mode.map(Into::into),
158 args.style.map(Into::into),
159 args.target_tokens,
160 args.focus,
161 args.preserve.into_iter().map(Into::into).collect(),
162 args.backend,
163 &defaults,
164 );
165
166 let summary = self
167 .summarizer
168 .compact(&result.page.content_hash, &result.page.extracted_md, &opts)
169 .await?;
170
171 let estimated_tokens = tokenizer::count(&summary.summary_md, family)?;
172
173 let assessment =
174 self.guard
175 .assess(url.as_str(), args.security.as_ref(), &summary.summary_md);
176 let content = self
177 .guard
178 .finish(&assessment, "", &assessment.acted_body, true);
179
180 Ok(SummarizeResponse {
181 content,
182 metadata: SummarizeMetadata {
183 backend: summary.effective_backend,
184 mode: opts.mode.as_str().to_string(),
185 style: opts.style.as_str().to_string(),
186 target_tokens: opts.target_tokens,
187 estimated_tokens,
188 cache_status: match summary.cache_status {
189 SummaryCacheStatus::Hit => SummaryCacheStatusWire::Hit,
190 SummaryCacheStatus::Miss => SummaryCacheStatusWire::Miss,
191 },
192 summarizer_fallback: summary.fallback.map(|f| SummarizerFallbackInfo {
193 from: f.from,
194 reason: f.reason.to_string(),
195 }),
196 source_url: url.as_str().to_string(),
197 source_fetched_at: jiff::Timestamp::from_second(result.page.fetched_at)
198 .map(|t| t.to_string())
199 .unwrap_or_default(),
200 focus: opts.focus,
201 preserve: opts
202 .preserve
203 .iter()
204 .map(|p| p.as_str().to_string())
205 .collect(),
206 prompt_injection: assessment.telemetry,
207 },
208 })
209 }
210}
211
212#[cfg(test)]
213mod tests {
214 use super::*;
215
216 #[test]
217 fn schema_round_trips_required_fields() {
218 let schema = schemars::schema_for!(SummarizeArgs);
219 let json = serde_json::to_string(&schema).unwrap();
220 for f in [
221 "url",
222 "target_tokens",
223 "mode",
224 "focus",
225 "preserve",
226 "style",
227 "backend",
228 ] {
229 assert!(json.contains(f), "missing {f}");
230 }
231 }
232
233 #[test]
234 fn enum_mappings_round_trip() {
235 assert_eq!(
236 CompactMode::from(SummarizeMode::Headlines),
237 CompactMode::Headlines,
238 );
239 assert_eq!(Style::from(SummarizeStyle::Bullet), Style::Bullet);
240 assert_eq!(
241 PreserveSection::from(SummarizePreserve::Tables),
242 PreserveSection::Tables,
243 );
244 }
245
246 #[test]
247 fn rejects_unknown_field() {
248 let r: Result<SummarizeArgs, _> = serde_json::from_str(r#"{"url":"https://x/","bogus":1}"#);
249 assert!(r.is_err());
250 }
251}