1use schemars::JsonSchema;
8use serde::{Deserialize, Serialize};
9
10#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, JsonSchema)]
15#[serde(rename_all = "snake_case")]
16pub enum CacheStatus {
17 Hit,
18 Miss,
19 Stale,
20}
21
22impl From<crate::fetcher::cached::CacheStatus> for CacheStatus {
23 fn from(v: crate::fetcher::cached::CacheStatus) -> Self {
24 use crate::fetcher::cached::CacheStatus as C;
25 match v {
26 C::Hit => CacheStatus::Hit,
27 C::Miss => CacheStatus::Miss,
28 C::Stale { .. } => CacheStatus::Stale,
29 }
30 }
31}
32
33#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, JsonSchema)]
35#[serde(rename_all = "snake_case")]
36pub enum CountSource {
37 Text,
38 Url,
39}
40
41#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)]
43pub struct FetchResponse {
44 pub content: String,
49 pub cache_status: CacheStatus,
50
51 #[serde(skip_serializing_if = "Option::is_none")]
54 pub revalidation: Option<StaleRevalidation>,
55
56 #[serde(skip_serializing_if = "Option::is_none")]
59 pub summarized: Option<bool>,
60
61 #[serde(skip_serializing_if = "Option::is_none")]
64 pub auto_summarized: Option<bool>,
65
66 #[serde(skip_serializing_if = "Option::is_none")]
69 pub summarizer_fallback: Option<SummarizerFallbackInfo>,
70}
71
72#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)]
77pub struct CountSingleResponse {
78 pub tokens: usize,
79 pub tokenizer: String,
80 pub source: CountSource,
81
82 #[serde(skip_serializing_if = "Option::is_none")]
83 pub url: Option<String>,
84 #[serde(skip_serializing_if = "Option::is_none")]
85 pub content_hash: Option<String>,
86 #[serde(skip_serializing_if = "Option::is_none")]
87 pub fetched_at: Option<String>,
88 #[serde(skip_serializing_if = "Option::is_none")]
89 pub cache_status: Option<CacheStatus>,
90}
91
92#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)]
97pub struct CountEstimates {
98 #[serde(skip_serializing_if = "Option::is_none")]
99 pub raw_html: Option<usize>,
100 pub extracted_md: usize,
101 pub summary_short: usize,
102 pub summary_medium: usize,
103}
104
105#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)]
107pub struct CountEstimatesResponse {
108 pub url: String,
109 pub tokenizer: String,
110 pub estimates: CountEstimates,
111}
112
113#[derive(Debug, Clone, Serialize, Deserialize)]
122#[serde(untagged)]
123pub enum CountResponse {
124 Single(CountSingleResponse),
125 Estimates(CountEstimatesResponse),
126}
127
128impl JsonSchema for CountResponse {
129 fn schema_name() -> std::borrow::Cow<'static, str> {
130 "CountResponse".into()
131 }
132
133 fn schema_id() -> std::borrow::Cow<'static, str> {
134 concat!(module_path!(), "::CountResponse").into()
135 }
136
137 fn json_schema(generator: &mut schemars::SchemaGenerator) -> schemars::Schema {
138 let single = generator.subschema_for::<CountSingleResponse>();
139 let estimates = generator.subschema_for::<CountEstimatesResponse>();
140 schemars::json_schema!({
141 "type": "object",
142 "oneOf": [single, estimates],
143 })
144 }
145}
146
147#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)]
149pub struct MetadataResponse {
150 #[serde(skip_serializing_if = "Option::is_none")]
151 pub title: Option<String>,
152 #[serde(skip_serializing_if = "Option::is_none")]
153 pub description: Option<String>,
154 #[serde(skip_serializing_if = "Option::is_none")]
155 pub author: Option<String>,
156 #[serde(skip_serializing_if = "Option::is_none")]
157 pub published: Option<String>,
158 #[serde(skip_serializing_if = "Option::is_none")]
159 pub modified: Option<String>,
160 #[serde(skip_serializing_if = "Option::is_none")]
161 pub image: Option<String>,
162 #[serde(skip_serializing_if = "Option::is_none")]
163 pub og_type: Option<String>,
164 #[serde(skip_serializing_if = "Option::is_none")]
165 pub canonical: Option<String>,
166 #[serde(skip_serializing_if = "Option::is_none")]
167 pub language: Option<String>,
168 pub schema_types: Vec<String>,
169 pub extraction_quality: f32,
170 pub url: String,
171 pub content_hash: String,
172 pub fetched_at: String,
173 pub cache_status: CacheStatus,
174
175 pub prompt_injection: crate::guard::GuardTelemetry,
177
178 #[serde(skip_serializing_if = "Option::is_none")]
181 pub security_notice: Option<String>,
182}
183
184#[derive(Debug, Clone, Serialize, Deserialize)]
187pub struct RoverError {
188 pub code: &'static str,
189 pub message: String,
190}
191
192impl RoverError {
193 pub const MAX_TOKENS_EXCEEDED: &'static str = "max_tokens_exceeded";
194 pub const INVALID_ARGS: &'static str = "invalid_args";
195 pub const INVALID_URL: &'static str = "invalid_url";
196 pub const SSRF_DENIED: &'static str = "ssrf_denied";
197 pub const FETCH_FAILED: &'static str = "fetch_failed";
198 pub const BOT_CHALLENGE: &'static str = "bot_challenge";
199 pub const EXTRACT_FAILED: &'static str = "extract_failed";
200 pub const STORAGE_ERROR: &'static str = "storage_error";
201 pub const TOKENIZER_UNAVAILABLE: &'static str = "tokenizer_unavailable";
202 pub const ROBOTS_DISALLOWED: &'static str = "robots_disallowed";
203 pub const ROBOTS_FETCH_FAILED: &'static str = "robots_fetch_failed";
204 pub const RETRY_EXHAUSTED: &'static str = "retry_exhausted";
205 pub const RATE_LIMITED: &'static str = "rate_limited";
206 pub const DEFERRED: &'static str = "deferred";
207 pub const TOO_MANY_URLS: &'static str = "too_many_urls";
208 pub const EMPTY_URL_LIST: &'static str = "empty_url_list";
209 pub const SUMMARIZER_NO_SUCH_BACKEND: &'static str = "summarizer_no_such_backend";
210 pub const SUMMARIZER_NO_EXTRACTIVE_FOR_FALLBACK: &'static str =
211 "summarizer_no_extractive_backend_for_fallback";
212 pub const SUMMARIZER_BACKEND_UNAVAILABLE: &'static str = "summarizer_backend_unavailable";
213 pub const SUMMARIZER_RATE_LIMITED: &'static str = "summarizer_rate_limited";
214 pub const SUMMARIZER_AUTH_FAILED: &'static str = "summarizer_auth_failed";
215 pub const SUMMARIZER_MODEL_ERROR: &'static str = "summarizer_model_error";
216 pub const SUMMARIZER_INVALID_REQUEST: &'static str = "summarizer_invalid_request";
217 pub const SUMMARIZER_LOCAL_FEATURE_NOT_COMPILED: &'static str =
218 "summarizer_local_feature_not_compiled";
219 pub const HEADLESS_FEATURE_NOT_COMPILED: &'static str = "headless_feature_not_compiled";
220 pub const HEADLESS_RENDERER_UNAVAILABLE: &'static str = "headless_renderer_unavailable";
221 pub const HEADLESS_LAUNCH_FAILED: &'static str = "headless_launch_failed";
222 pub const HEADLESS_RENDER_TIMEOUT: &'static str = "headless_render_timeout";
223 pub const HEADLESS_PAGE_CLOSED: &'static str = "headless_page_closed";
224 pub const HEADLESS_INTERNAL_ERROR: &'static str = "headless_internal_error";
225 pub const CAPTIONER_NO_SUCH: &'static str = "captioner_no_such";
226 pub const CAPTIONER_NOT_CONFIGURED: &'static str = "captioner_not_configured";
227 pub const CAPTIONER_LOCAL_FEATURE_NOT_COMPILED: &'static str =
228 "captioner_local_feature_not_compiled";
229 pub const CAPTIONER_RATE_LIMITED: &'static str = "captioner_rate_limited";
230 pub const CAPTIONER_AUTH_FAILED: &'static str = "captioner_auth_failed";
231 pub const CAPTIONER_BACKEND_UNAVAILABLE: &'static str = "captioner_backend_unavailable";
232 pub const CAPTIONER_MODEL_ERROR: &'static str = "captioner_model_error";
233 pub const CAPTIONER_IMAGE_DECODE_FAILED: &'static str = "captioner_image_decode_failed";
234
235 pub fn new(code: &'static str, message: impl Into<String>) -> Self {
236 Self {
237 code,
238 message: message.into(),
239 }
240 }
241}
242
243#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)]
245pub struct TaskCreatedResponse {
246 pub task_id: String,
247 pub status: String,
248 pub kind: String,
249 pub monitor_command: String,
250 pub poll_command: String,
251 pub cancel_command: String,
252 pub hint: String,
253}
254
255#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)]
257pub struct StaleRevalidation {
258 pub task_id: String,
259 pub monitor_command: String,
260 pub poll_command: String,
261 pub hint: String,
262}
263
264#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)]
266pub struct SummarizeResponse {
267 pub content: String,
269 pub metadata: SummarizeMetadata,
270}
271
272#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)]
274pub struct SummarizeMetadata {
275 pub backend: String,
276 pub mode: String,
277 pub style: String,
278 #[serde(skip_serializing_if = "Option::is_none")]
279 pub target_tokens: Option<usize>,
280 pub estimated_tokens: usize,
281 pub cache_status: SummaryCacheStatusWire,
282 #[serde(skip_serializing_if = "Option::is_none")]
283 pub summarizer_fallback: Option<SummarizerFallbackInfo>,
284 pub source_url: String,
285 pub source_fetched_at: String,
286 #[serde(skip_serializing_if = "Option::is_none")]
287 pub focus: Option<String>,
288 pub preserve: Vec<String>,
289
290 pub prompt_injection: crate::guard::GuardTelemetry,
292}
293
294#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, JsonSchema)]
297#[serde(rename_all = "snake_case")]
298pub enum SummaryCacheStatusWire {
299 Hit,
300 Miss,
301}
302
303#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)]
306pub struct SummarizerFallbackInfo {
307 pub from: String,
308 pub reason: String,
309}
310
311#[cfg(test)]
312mod tests {
313 use super::*;
314
315 #[test]
316 fn fetch_response_serialises_snake_case_cache_status() {
317 let v = FetchResponse {
318 content: "x".into(),
319 cache_status: CacheStatus::Hit,
320 revalidation: None,
321 summarized: None,
322 auto_summarized: None,
323 summarizer_fallback: None,
324 };
325 let s = serde_json::to_string(&v).unwrap();
326 assert!(s.contains("\"cache_status\":\"hit\""), "got: {s}");
327 assert!(s.contains("\"content\":\"x\""), "got: {s}");
328 }
329
330 #[test]
331 fn count_response_omits_optional_fields() {
332 let v = CountResponse::Single(CountSingleResponse {
333 tokens: 7,
334 tokenizer: "o200k".into(),
335 source: CountSource::Text,
336 url: None,
337 content_hash: None,
338 fetched_at: None,
339 cache_status: None,
340 });
341 let s = serde_json::to_string(&v).unwrap();
342 assert!(!s.contains("url"));
343 assert!(!s.contains("content_hash"));
344 assert!(!s.contains("cache_status"));
345 }
346
347 #[test]
348 fn count_response_estimates_serialises_as_estimates_shape() {
349 let v = CountResponse::Estimates(CountEstimatesResponse {
350 url: "https://example.com/p".into(),
351 tokenizer: "o200k".into(),
352 estimates: CountEstimates {
353 raw_html: None,
354 extracted_md: 123,
355 summary_short: 45,
356 summary_medium: 78,
357 },
358 });
359 let s = serde_json::to_string(&v).unwrap();
360 assert!(s.contains("\"estimates\""), "got: {s}");
362 assert!(s.contains("\"extracted_md\":123"), "got: {s}");
363 assert!(!s.contains("raw_html"), "got: {s}");
365 }
366
367 #[test]
368 fn rover_error_codes_are_stable_constants() {
369 let codes: &[&'static str] = &[
370 RoverError::MAX_TOKENS_EXCEEDED,
371 RoverError::INVALID_ARGS,
372 RoverError::FETCH_FAILED,
373 RoverError::SSRF_DENIED,
374 RoverError::EXTRACT_FAILED,
375 RoverError::STORAGE_ERROR,
376 RoverError::TOKENIZER_UNAVAILABLE,
377 RoverError::INVALID_URL,
378 RoverError::ROBOTS_DISALLOWED,
379 RoverError::ROBOTS_FETCH_FAILED,
380 RoverError::RETRY_EXHAUSTED,
381 RoverError::RATE_LIMITED,
382 ];
383 for (i, a) in codes.iter().enumerate() {
384 for (j, b) in codes.iter().enumerate() {
385 if i != j {
386 assert_ne!(a, b, "duplicate code: {a}");
387 }
388 }
389 }
390 }
391}