Skip to main content

rover/mcp/
envelope.rs

1//! Wire-side envelope types returned to MCP clients.
2//!
3//! These are the JSON shapes Claude Code (or any other MCP client) sees.
4//! The `code` strings on [`RoverError`] are stable from M3 onward and will
5//! be documented in `docs/mcp-tools.md` (M8).
6
7use schemars::JsonSchema;
8use serde::{Deserialize, Serialize};
9
10/// Status of a fetch response relative to the cache. Mirrors the three
11/// variants of [`crate::fetcher::cached::CacheStatus`]; M3 does not
12/// distinguish 304-revalidated from a fresh hit (M2 treats a 304 as a
13/// regular `Hit` after refreshing `expires_at`).
14#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, JsonSchema)]
15#[serde(rename_all = "snake_case")]
16pub enum CacheStatus {
17    Hit,
18    Miss,
19    Stale,
20}
21
22impl From<crate::fetcher::cached::CacheStatus> for CacheStatus {
23    fn from(v: crate::fetcher::cached::CacheStatus) -> Self {
24        use crate::fetcher::cached::CacheStatus as C;
25        match v {
26            C::Hit => CacheStatus::Hit,
27            C::Miss => CacheStatus::Miss,
28            C::Stale { .. } => CacheStatus::Stale,
29        }
30    }
31}
32
33/// Where the token count came from on a `count_tokens` response.
34#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, JsonSchema)]
35#[serde(rename_all = "snake_case")]
36pub enum CountSource {
37    Text,
38    Url,
39}
40
41/// Successful `fetch` response (full content).
42#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)]
43pub struct FetchResponse {
44    /// The full agent-facing document: a trusted preamble followed by the
45    /// nonce-wrapped frontmatter+body (see the prompt-injection guard). When
46    /// the guard's `wrap` method is allowlisted for the URL this is the
47    /// unwrapped frontmatter+body instead.
48    pub content: String,
49    pub cache_status: CacheStatus,
50
51    /// Present when `cache_status == "stale"` and a background revalidate
52    /// task was successfully queued. Agents can monitor or ignore.
53    #[serde(skip_serializing_if = "Option::is_none")]
54    pub revalidation: Option<StaleRevalidation>,
55
56    /// `true` when the agent supplied an explicit `summarize` arg and the
57    /// returned `markdown` is the summary, not the extracted body.
58    #[serde(skip_serializing_if = "Option::is_none")]
59    pub summarized: Option<bool>,
60
61    /// `true` when the extracted body exceeded `max_tokens` and Rover
62    /// auto-summarized to bring it within budget.
63    #[serde(skip_serializing_if = "Option::is_none")]
64    pub auto_summarized: Option<bool>,
65
66    /// Populated when whichever summarize path ran (`summarize` arg or the
67    /// auto path on `max_tokens`) fell back to an extractive backend.
68    #[serde(skip_serializing_if = "Option::is_none")]
69    pub summarizer_fallback: Option<SummarizerFallbackInfo>,
70}
71
72/// Single-count `count_tokens` or `fetch{count_only:true}` response.
73///
74/// This is the historical M2/M3 shape: one tokenization result over either
75/// inline text or a fetched URL's extracted markdown.
76#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)]
77pub struct CountSingleResponse {
78    pub tokens: usize,
79    pub tokenizer: String,
80    pub source: CountSource,
81
82    #[serde(skip_serializing_if = "Option::is_none")]
83    pub url: Option<String>,
84    #[serde(skip_serializing_if = "Option::is_none")]
85    pub content_hash: Option<String>,
86    #[serde(skip_serializing_if = "Option::is_none")]
87    pub fetched_at: Option<String>,
88    #[serde(skip_serializing_if = "Option::is_none")]
89    pub cache_status: Option<CacheStatus>,
90}
91
92/// Four token-count estimates returned in `mode = "estimates"`.
93///
94/// `raw_html` is `None` when `[cache] store_raw_html = false` (the default)
95/// or when the cached row has no `raw_html_zstd` blob.
96#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)]
97pub struct CountEstimates {
98    #[serde(skip_serializing_if = "Option::is_none")]
99    pub raw_html: Option<usize>,
100    pub extracted_md: usize,
101    pub summary_short: usize,
102    pub summary_medium: usize,
103}
104
105/// `count_tokens { mode: "estimates" }` response shape.
106#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)]
107pub struct CountEstimatesResponse {
108    pub url: String,
109    pub tokenizer: String,
110    pub estimates: CountEstimates,
111}
112
113/// `count_tokens` / `fetch{count_only:true}` response. Untagged so the
114/// historical single-count shape (still the default) remains
115/// wire-compatible; agents that opt into `mode = "estimates"` see the
116/// `CountEstimatesResponse` variant instead.
117///
118/// `JsonSchema` is implemented manually so the generated schema is rooted
119/// at `type: "object"` with a `oneOf` of the two variants — matching the
120/// pattern used by `FetchOutput` in `src/mcp/tools/fetch.rs`.
121#[derive(Debug, Clone, Serialize, Deserialize)]
122#[serde(untagged)]
123pub enum CountResponse {
124    Single(CountSingleResponse),
125    Estimates(CountEstimatesResponse),
126}
127
128impl JsonSchema for CountResponse {
129    fn schema_name() -> std::borrow::Cow<'static, str> {
130        "CountResponse".into()
131    }
132
133    fn schema_id() -> std::borrow::Cow<'static, str> {
134        concat!(module_path!(), "::CountResponse").into()
135    }
136
137    fn json_schema(generator: &mut schemars::SchemaGenerator) -> schemars::Schema {
138        let single = generator.subschema_for::<CountSingleResponse>();
139        let estimates = generator.subschema_for::<CountEstimatesResponse>();
140        schemars::json_schema!({
141            "type": "object",
142            "oneOf": [single, estimates],
143        })
144    }
145}
146
147/// `get_metadata` response — structured metadata only, no markdown body.
148#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)]
149pub struct MetadataResponse {
150    #[serde(skip_serializing_if = "Option::is_none")]
151    pub title: Option<String>,
152    #[serde(skip_serializing_if = "Option::is_none")]
153    pub description: Option<String>,
154    #[serde(skip_serializing_if = "Option::is_none")]
155    pub author: Option<String>,
156    #[serde(skip_serializing_if = "Option::is_none")]
157    pub published: Option<String>,
158    #[serde(skip_serializing_if = "Option::is_none")]
159    pub modified: Option<String>,
160    #[serde(skip_serializing_if = "Option::is_none")]
161    pub image: Option<String>,
162    #[serde(skip_serializing_if = "Option::is_none")]
163    pub og_type: Option<String>,
164    #[serde(skip_serializing_if = "Option::is_none")]
165    pub canonical: Option<String>,
166    #[serde(skip_serializing_if = "Option::is_none")]
167    pub language: Option<String>,
168    pub schema_types: Vec<String>,
169    pub extraction_quality: f32,
170    pub url: String,
171    pub content_hash: String,
172    pub fetched_at: String,
173    pub cache_status: CacheStatus,
174
175    /// Guard telemetry for this response.
176    pub prompt_injection: crate::guard::GuardTelemetry,
177
178    /// Trusted warning surfaced when injection text was detected in the
179    /// metadata values (the structured equivalent of an in-band notice).
180    #[serde(skip_serializing_if = "Option::is_none")]
181    pub security_notice: Option<String>,
182}
183
184/// Stable error envelope returned over MCP. `code` is from the fixed set
185/// documented in the M3 design.
186#[derive(Debug, Clone, Serialize, Deserialize)]
187pub struct RoverError {
188    pub code: &'static str,
189    pub message: String,
190}
191
192impl RoverError {
193    pub const MAX_TOKENS_EXCEEDED: &'static str = "max_tokens_exceeded";
194    pub const INVALID_ARGS: &'static str = "invalid_args";
195    pub const INVALID_URL: &'static str = "invalid_url";
196    pub const SSRF_DENIED: &'static str = "ssrf_denied";
197    pub const FETCH_FAILED: &'static str = "fetch_failed";
198    pub const BOT_CHALLENGE: &'static str = "bot_challenge";
199    pub const EXTRACT_FAILED: &'static str = "extract_failed";
200    pub const STORAGE_ERROR: &'static str = "storage_error";
201    pub const TOKENIZER_UNAVAILABLE: &'static str = "tokenizer_unavailable";
202    pub const ROBOTS_DISALLOWED: &'static str = "robots_disallowed";
203    pub const ROBOTS_FETCH_FAILED: &'static str = "robots_fetch_failed";
204    pub const RETRY_EXHAUSTED: &'static str = "retry_exhausted";
205    pub const RATE_LIMITED: &'static str = "rate_limited";
206    pub const DEFERRED: &'static str = "deferred";
207    pub const TOO_MANY_URLS: &'static str = "too_many_urls";
208    pub const EMPTY_URL_LIST: &'static str = "empty_url_list";
209    pub const SUMMARIZER_NO_SUCH_BACKEND: &'static str = "summarizer_no_such_backend";
210    pub const SUMMARIZER_NO_EXTRACTIVE_FOR_FALLBACK: &'static str =
211        "summarizer_no_extractive_backend_for_fallback";
212    pub const SUMMARIZER_BACKEND_UNAVAILABLE: &'static str = "summarizer_backend_unavailable";
213    pub const SUMMARIZER_RATE_LIMITED: &'static str = "summarizer_rate_limited";
214    pub const SUMMARIZER_AUTH_FAILED: &'static str = "summarizer_auth_failed";
215    pub const SUMMARIZER_MODEL_ERROR: &'static str = "summarizer_model_error";
216    pub const SUMMARIZER_INVALID_REQUEST: &'static str = "summarizer_invalid_request";
217    pub const SUMMARIZER_LOCAL_FEATURE_NOT_COMPILED: &'static str =
218        "summarizer_local_feature_not_compiled";
219    pub const HEADLESS_FEATURE_NOT_COMPILED: &'static str = "headless_feature_not_compiled";
220    pub const HEADLESS_RENDERER_UNAVAILABLE: &'static str = "headless_renderer_unavailable";
221    pub const HEADLESS_LAUNCH_FAILED: &'static str = "headless_launch_failed";
222    pub const HEADLESS_RENDER_TIMEOUT: &'static str = "headless_render_timeout";
223    pub const HEADLESS_PAGE_CLOSED: &'static str = "headless_page_closed";
224    pub const HEADLESS_INTERNAL_ERROR: &'static str = "headless_internal_error";
225    pub const CAPTIONER_NO_SUCH: &'static str = "captioner_no_such";
226    pub const CAPTIONER_NOT_CONFIGURED: &'static str = "captioner_not_configured";
227    pub const CAPTIONER_LOCAL_FEATURE_NOT_COMPILED: &'static str =
228        "captioner_local_feature_not_compiled";
229    pub const CAPTIONER_RATE_LIMITED: &'static str = "captioner_rate_limited";
230    pub const CAPTIONER_AUTH_FAILED: &'static str = "captioner_auth_failed";
231    pub const CAPTIONER_BACKEND_UNAVAILABLE: &'static str = "captioner_backend_unavailable";
232    pub const CAPTIONER_MODEL_ERROR: &'static str = "captioner_model_error";
233    pub const CAPTIONER_IMAGE_DECODE_FAILED: &'static str = "captioner_image_decode_failed";
234
235    pub fn new(code: &'static str, message: impl Into<String>) -> Self {
236        Self {
237            code,
238            message: message.into(),
239        }
240    }
241}
242
243/// Returned by tools that schedule a background task.
244#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)]
245pub struct TaskCreatedResponse {
246    pub task_id: String,
247    pub status: String,
248    pub kind: String,
249    pub monitor_command: String,
250    pub poll_command: String,
251    pub cancel_command: String,
252    pub hint: String,
253}
254
255/// Stale-served envelope on a `fetch` response.
256#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)]
257pub struct StaleRevalidation {
258    pub task_id: String,
259    pub monitor_command: String,
260    pub poll_command: String,
261    pub hint: String,
262}
263
264/// `summarize` tool response.
265#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)]
266pub struct SummarizeResponse {
267    /// The agent-facing summary as a nonce-wrapped document (see the guard).
268    pub content: String,
269    pub metadata: SummarizeMetadata,
270}
271
272/// Wire-side metadata for a `summarize` response.
273#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)]
274pub struct SummarizeMetadata {
275    pub backend: String,
276    pub mode: String,
277    pub style: String,
278    #[serde(skip_serializing_if = "Option::is_none")]
279    pub target_tokens: Option<usize>,
280    pub estimated_tokens: usize,
281    pub cache_status: SummaryCacheStatusWire,
282    #[serde(skip_serializing_if = "Option::is_none")]
283    pub summarizer_fallback: Option<SummarizerFallbackInfo>,
284    pub source_url: String,
285    pub source_fetched_at: String,
286    #[serde(skip_serializing_if = "Option::is_none")]
287    pub focus: Option<String>,
288    pub preserve: Vec<String>,
289
290    /// Guard telemetry for this summary.
291    pub prompt_injection: crate::guard::GuardTelemetry,
292}
293
294/// Cache-status wire enum for the summary cache (distinct from the page
295/// cache's `CacheStatus` because the summary cache has no `Stale` variant).
296#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, JsonSchema)]
297#[serde(rename_all = "snake_case")]
298pub enum SummaryCacheStatusWire {
299    Hit,
300    Miss,
301}
302
303/// Carried on the response when the requested backend failed and an
304/// extractive backend was used in its place.
305#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)]
306pub struct SummarizerFallbackInfo {
307    pub from: String,
308    pub reason: String,
309}
310
311#[cfg(test)]
312mod tests {
313    use super::*;
314
315    #[test]
316    fn fetch_response_serialises_snake_case_cache_status() {
317        let v = FetchResponse {
318            content: "x".into(),
319            cache_status: CacheStatus::Hit,
320            revalidation: None,
321            summarized: None,
322            auto_summarized: None,
323            summarizer_fallback: None,
324        };
325        let s = serde_json::to_string(&v).unwrap();
326        assert!(s.contains("\"cache_status\":\"hit\""), "got: {s}");
327        assert!(s.contains("\"content\":\"x\""), "got: {s}");
328    }
329
330    #[test]
331    fn count_response_omits_optional_fields() {
332        let v = CountResponse::Single(CountSingleResponse {
333            tokens: 7,
334            tokenizer: "o200k".into(),
335            source: CountSource::Text,
336            url: None,
337            content_hash: None,
338            fetched_at: None,
339            cache_status: None,
340        });
341        let s = serde_json::to_string(&v).unwrap();
342        assert!(!s.contains("url"));
343        assert!(!s.contains("content_hash"));
344        assert!(!s.contains("cache_status"));
345    }
346
347    #[test]
348    fn count_response_estimates_serialises_as_estimates_shape() {
349        let v = CountResponse::Estimates(CountEstimatesResponse {
350            url: "https://example.com/p".into(),
351            tokenizer: "o200k".into(),
352            estimates: CountEstimates {
353                raw_html: None,
354                extracted_md: 123,
355                summary_short: 45,
356                summary_medium: 78,
357            },
358        });
359        let s = serde_json::to_string(&v).unwrap();
360        // Untagged: top-level keys are the inner struct's fields.
361        assert!(s.contains("\"estimates\""), "got: {s}");
362        assert!(s.contains("\"extracted_md\":123"), "got: {s}");
363        // raw_html=None is omitted by skip_serializing_if.
364        assert!(!s.contains("raw_html"), "got: {s}");
365    }
366
367    #[test]
368    fn rover_error_codes_are_stable_constants() {
369        let codes: &[&'static str] = &[
370            RoverError::MAX_TOKENS_EXCEEDED,
371            RoverError::INVALID_ARGS,
372            RoverError::FETCH_FAILED,
373            RoverError::SSRF_DENIED,
374            RoverError::EXTRACT_FAILED,
375            RoverError::STORAGE_ERROR,
376            RoverError::TOKENIZER_UNAVAILABLE,
377            RoverError::INVALID_URL,
378            RoverError::ROBOTS_DISALLOWED,
379            RoverError::ROBOTS_FETCH_FAILED,
380            RoverError::RETRY_EXHAUSTED,
381            RoverError::RATE_LIMITED,
382        ];
383        for (i, a) in codes.iter().enumerate() {
384            for (j, b) in codes.iter().enumerate() {
385                if i != j {
386                    assert_ne!(a, b, "duplicate code: {a}");
387                }
388            }
389        }
390    }
391}