Skip to main content

lean_ctx/tools/registered/
ctx_url_read.rs

1use rmcp::model::Tool;
2use rmcp::ErrorData;
3use serde_json::{json, Map, Value};
4
5use crate::core::protocol::append_savings;
6use crate::core::tokens::count_tokens;
7use crate::core::web::{self, ReadMode, ReadOptions};
8use crate::server::tool_trait::{get_int, get_str, McpTool, ToolContext, ToolOutput};
9use crate::tool_defs::tool_def;
10
11/// `ctx_url_read` — fetch a web page, PDF, or YouTube video and return
12/// compressed, citation-backed context (HTML/PDF→text, transcript flattening,
13/// extractive research-compression modes).
14pub struct CtxUrlReadTool;
15
16impl McpTool for CtxUrlReadTool {
17    fn name(&self) -> &'static str {
18        "ctx_url_read"
19    }
20
21    fn tool_def(&self) -> Tool {
22        tool_def(
23            "ctx_url_read",
24            "Fetch a web page, PDF, or YouTube URL as compressed, cited context.\n\
25             HTML/PDF→clean text, YouTube→transcript; modes: auto|markdown|text|links|facts|quotes|transcript.\n\
26             facts/quotes return claims with confidence + source. SSRF-guarded (http/https only, blocks private/loopback).\n\
27             Use for research/crawl instead of raw fetch.",
28            json!({
29                "type": "object",
30                "properties": {
31                    "url": { "type": "string", "description": "http(s) URL of a page or YouTube video" },
32                    "mode": {
33                        "type": "string",
34                        "enum": ["auto", "markdown", "text", "links", "facts", "quotes", "transcript"],
35                        "description": "Distillation mode (default: auto — Markdown for pages, transcript for videos)"
36                    },
37                    "query": { "type": "string", "description": "Optional focus query; boosts relevance in facts/quotes modes" },
38                    "max_tokens": { "type": "integer", "description": "Token budget for returned content (default: 6000)" },
39                    "max_items": { "type": "integer", "description": "Max items for facts/quotes modes (default: 12)" },
40                    "timeout_secs": { "type": "integer", "description": "Request timeout in seconds (default: 20, max: 60)" }
41                },
42                "required": ["url"]
43            }),
44        )
45    }
46
47    fn handle(
48        &self,
49        args: &Map<String, Value>,
50        _ctx: &ToolContext,
51    ) -> Result<ToolOutput, ErrorData> {
52        let url = get_str(args, "url")
53            .ok_or_else(|| ErrorData::invalid_params("url is required", None))?;
54
55        let mode = match get_str(args, "mode") {
56            Some(m) => ReadMode::parse(&m).ok_or_else(|| {
57                ErrorData::invalid_params(
58                    format!("invalid mode '{m}' (use: auto, markdown, text, links, facts, quotes, transcript)"),
59                    None,
60                )
61            })?,
62            None => ReadMode::Auto,
63        };
64
65        let query = get_str(args, "query");
66        let max_tokens = get_int(args, "max_tokens")
67            .map_or(web::DEFAULT_MAX_TOKENS, |n| n.clamp(200, 50_000) as usize);
68        let max_items =
69            get_int(args, "max_items").map_or(web::DEFAULT_MAX_ITEMS, |n| n.clamp(1, 100) as usize);
70        let timeout_secs = get_int(args, "timeout_secs")
71            .map_or(web::fetch::DEFAULT_TIMEOUT_SECS, |n| n.clamp(1, 60) as u64);
72
73        let opts = ReadOptions {
74            url: &url,
75            mode,
76            query: query.as_deref(),
77            max_tokens,
78            max_items,
79            timeout_secs,
80        };
81
82        let result = tokio::task::block_in_place(|| web::read_url(&opts));
83
84        match result {
85            Ok(read) => {
86                let sent = count_tokens(&read.content);
87                let saved = read.original_tokens.saturating_sub(sent);
88                let text = append_savings(&read.content, read.original_tokens, sent);
89                Ok(ToolOutput {
90                    text,
91                    original_tokens: read.original_tokens,
92                    saved_tokens: saved,
93                    mode: Some(read.mode.label().to_string()),
94                    path: Some(read.final_url),
95                    changed: false,
96                })
97            }
98            Err(e) => Err(ErrorData::invalid_params(
99                format!("ctx_url_read failed: {e}"),
100                None,
101            )),
102        }
103    }
104}