Skip to main content

crates_docs/tools/docs/
lookup.rs

1//! Lookup crate documentation tool
2#![allow(clippy::no_effect_replace)]
3#![allow(missing_docs)]
4
5use crate::tools::docs::DocService;
6use crate::tools::Tool;
7use async_trait::async_trait;
8use rust_mcp_sdk::schema::CallToolError;
9use serde::{Deserialize, Serialize};
10use std::sync::Arc;
11
12/// Lookup crate documentation tool
13#[rust_mcp_sdk::macros::mcp_tool(
14    name = "lookup_crate",
15    title = "Lookup Crate Documentation",
16    description = "Get complete documentation for a Rust crate from docs.rs. Returns the main documentation page content, including modules, structs, functions, etc. Suitable for understanding the overall functionality and usage of a crate.",
17    destructive_hint = false,
18    idempotent_hint = true,
19    open_world_hint = false,
20    read_only_hint = true,
21    execution(task_support = "optional"),
22    icons = [
23        (src = "https://docs.rs/favicon.ico", mime_type = "image/x-icon", sizes = ["32x32"], theme = "light"),
24        (src = "https://docs.rs/favicon.ico", mime_type = "image/x-icon", sizes = ["32x32"], theme = "dark")
25    ]
26)]
27#[derive(Debug, Clone, Deserialize, Serialize, rust_mcp_sdk::macros::JsonSchema)]
28pub struct LookupCrateTool {
29    /// Crate name
30    #[json_schema(
31        title = "Crate 名称",
32        description = "要查找的 Crate name,例如:serde、tokio、reqwest"
33    )]
34    pub crate_name: String,
35
36    /// Version (optional, defaults to latest)
37    #[json_schema(
38        title = "版本号",
39        description = "Specify crate version, e.g.: 1.0.0. Uses latest version if not specified"
40    )]
41    pub version: Option<String>,
42
43    /// Output format: markdown, text, or html
44    #[json_schema(
45        title = "输出格式",
46        description = "Documentation output format: markdown (default), text (plain text), html",
47        default = "markdown"
48    )]
49    pub format: Option<String>,
50}
51
52/// Lookup crate documentation tool实现
53pub struct LookupCrateToolImpl {
54    service: Arc<DocService>,
55}
56
57impl LookupCrateToolImpl {
58    /// Create a new lookup tool instance
59    #[must_use]
60    pub fn new(service: Arc<DocService>) -> Self {
61        Self { service }
62    }
63
64    /// Get crate documentation
65    async fn fetch_crate_docs(
66        &self,
67        crate_name: &str,
68        version: Option<&str>,
69    ) -> std::result::Result<String, CallToolError> {
70        // Try to get from cache
71        if let Some(cached) = self
72            .service
73            .doc_cache()
74            .get_crate_docs(crate_name, version)
75            .await
76        {
77            return Ok(cached);
78        }
79
80        // Build URL
81        let url = if let Some(ver) = version {
82            format!("https://docs.rs/{crate_name}/{ver}/")
83        } else {
84            format!("https://docs.rs/{crate_name}/")
85        };
86
87        // Send HTTP request (reusing DocService client)
88        let response = self
89            .service
90            .client()
91            .get(&url)
92            .send()
93            .await
94            .map_err(|e| CallToolError::from_message(format!("HTTP request failed: {e}")))?;
95
96        if !response.status().is_success() {
97            return Err(CallToolError::from_message(format!(
98                "Failed to get documentation: HTTP {} - {}",
99                response.status(),
100                response.text().await.unwrap_or_default()
101            )));
102        }
103
104        let html = response
105            .text()
106            .await
107            .map_err(|e| CallToolError::from_message(format!("读取响应失败: {e}")))?;
108
109        // 提取文档内容
110        let docs = extract_documentation(&html);
111
112        // 缓存结果
113        self.service
114            .doc_cache()
115            .set_crate_docs(crate_name, version, docs.clone())
116            .await;
117
118        Ok(docs)
119    }
120
121    /// 获取原始 HTML 文档(用于 text 格式)
122    async fn fetch_raw_html(
123        &self,
124        crate_name: &str,
125        version: Option<&str>,
126    ) -> std::result::Result<String, CallToolError> {
127        // Build URL
128        let url = if let Some(ver) = version {
129            format!("https://docs.rs/{crate_name}/{ver}/")
130        } else {
131            format!("https://docs.rs/{crate_name}/")
132        };
133
134        // Send HTTP request (reusing DocService client)
135        let response = self
136            .service
137            .client()
138            .get(&url)
139            .send()
140            .await
141            .map_err(|e| CallToolError::from_message(format!("HTTP request failed: {e}")))?;
142
143        if !response.status().is_success() {
144            return Err(CallToolError::from_message(format!(
145                "Failed to get documentation: HTTP {} - {}",
146                response.status(),
147                response.text().await.unwrap_or_default()
148            )));
149        }
150
151        let html = response
152            .text()
153            .await
154            .map_err(|e| CallToolError::from_message(format!("读取响应失败: {e}")))?;
155
156        Ok(html)
157    }
158}
159
160/// 从 HTML 中提取文档内容
161fn extract_documentation(html: &str) -> String {
162    // 先清理 HTML(移除 script, style, noscript 等标签及内容)
163    let cleaned_html = clean_html(html);
164    // 使用 html2md 库将清理后的 HTML 转换为 Markdown
165    html2md::parse_html(&cleaned_html)
166}
167
168/// 清理 HTML,移除不需要的标签(script, style, noscript, iframe)及其内容
169fn clean_html(html: &str) -> String {
170    let mut result = String::new();
171    let mut i = 0;
172    let chars: Vec<char> = html.chars().collect();
173    let len = chars.len();
174    let mut skip_depth = 0; // 跟跳过标签的嵌套深度
175
176    while i < len {
177        let c = chars[i];
178
179        if c == '<' {
180            let start = i;
181            let mut j = i + 1;
182
183            // 收集标签名
184            let mut tag_name = String::new();
185            while j < len && chars[j] != '>' && !chars[j].is_whitespace() {
186                tag_name.push(chars[j]);
187                j += 1;
188            }
189
190            let tag_lower = tag_name.to_lowercase();
191            let pure_tag = tag_lower.trim_start_matches('/');
192
193            // 检查是否是需要跳过内容的标签
194            let is_skip_tag = pure_tag == "script"
195                || pure_tag == "style"
196                || pure_tag == "noscript"
197                || pure_tag == "iframe";
198
199            if is_skip_tag {
200                if tag_lower.starts_with('/') {
201                    // 结束标签
202                    if skip_depth > 0 {
203                        skip_depth -= 1;
204                    }
205                    // 跳过整个标签
206                    while j < len && chars[j] != '>' {
207                        j += 1;
208                    }
209                    if j < len {
210                        j += 1;
211                    }
212                    i = j;
213                    continue;
214                }
215
216                // 开始标签
217                skip_depth += 1;
218                // 跳过整个标签
219                while j < len && chars[j] != '>' {
220                    j += 1;
221                }
222                if j < len {
223                    j += 1;
224                }
225                i = j;
226                continue;
227            }
228
229            // 跳过直到 '>'
230            while j < len && chars[j] != '>' {
231                j += 1;
232            }
233            if j < len {
234                j += 1;
235            }
236
237            // 保留不是跳过标签的内容
238            if skip_depth == 0 {
239                result.extend(chars[start..j].iter().copied());
240            }
241
242            i = j;
243        } else {
244            if skip_depth == 0 {
245                result.push(c);
246            }
247            i += 1;
248        }
249    }
250
251    result
252}
253
254/// 将 HTML 转换为纯文本(移除所有 HTML 标签)
255fn html_to_text(html: &str) -> String {
256    let mut result = String::new();
257    let mut skip_content = false; // 是否跳过标签内容(如 script, style)
258    let mut i = 0;
259    let chars: Vec<char> = html.chars().collect();
260    let len = chars.len();
261
262    while i < len {
263        let c = chars[i];
264
265        match c {
266            '<' => {
267                // 跳过标签
268                let mut j = i + 1;
269                let mut tag_name = String::new();
270
271                // 收集标签名
272                while j < len && chars[j] != '>' && !chars[j].is_whitespace() {
273                    tag_name.push(chars[j]);
274                    j += 1;
275                }
276
277                let tag_lower = tag_name.to_lowercase();
278                let is_closing = tag_lower.starts_with('/');
279                let pure_tag = tag_lower.trim_start_matches('/');
280
281                // 检查是否是需要跳过内容的标签
282                if !is_closing && !skip_content {
283                    skip_content = pure_tag == "script"
284                        || pure_tag == "style"
285                        || pure_tag == "noscript"
286                        || pure_tag == "iframe";
287                } else if is_closing {
288                    skip_content = false;
289                }
290
291                // 跳过整个标签
292                while j < len && chars[j] != '>' {
293                    j += 1;
294                }
295                if j < len {
296                    j += 1; // 跳过 '>'
297                }
298
299                i = j;
300
301                // 标签后添加空格(如果是块级元素)
302                if !skip_content {
303                    result.push(' ');
304                }
305            }
306            '&' => {
307                // 处理 HTML 实体
308                let mut j = i + 1;
309                let mut entity = String::new();
310                while j < len && chars[j] != ';' {
311                    entity.push(chars[j]);
312                    j += 1;
313                }
314                if j < len {
315                    j += 1; // 跳过 ';'
316                }
317
318                // 常见 HTML 实体映射
319                let replacement = match entity.as_str() {
320                    "lt" => "<",
321                    "gt" => ">",
322                    "amp" => "&",
323                    "quot" => "\"",
324                    "apos" => "'",
325                    "nbsp" => " ",
326                    _ => "",
327                };
328                if !replacement.is_empty() {
329                    result.push_str(replacement);
330                }
331                i = j;
332            }
333            _ => {
334                if !skip_content {
335                    result.push(c);
336                }
337                i += 1;
338            }
339        }
340    }
341
342    // 清理多余的空白
343    clean_whitespace(&result)
344}
345
346/// 清理多余的空白字符
347fn clean_whitespace(text: &str) -> String {
348    text.split_whitespace().collect::<Vec<_>>().join(" ")
349}
350
351#[async_trait]
352impl Tool for LookupCrateToolImpl {
353    fn definition(&self) -> rust_mcp_sdk::schema::Tool {
354        LookupCrateTool::tool()
355    }
356
357    async fn execute(
358        &self,
359        arguments: serde_json::Value,
360    ) -> std::result::Result<
361        rust_mcp_sdk::schema::CallToolResult,
362        rust_mcp_sdk::schema::CallToolError,
363    > {
364        let params: LookupCrateTool = serde_json::from_value(arguments).map_err(|e| {
365            rust_mcp_sdk::schema::CallToolError::invalid_arguments(
366                "lookup_crate",
367                Some(format!("参数解析失败: {e}")),
368            )
369        })?;
370
371        let format = params.format.unwrap_or_else(|| "markdown".to_string());
372        let content = match format.as_str() {
373            "text" => {
374                // 获取原始 HTML 并转换为纯文本
375                let html = self
376                    .fetch_raw_html(&params.crate_name, params.version.as_deref())
377                    .await?;
378                html_to_text(&html)
379            }
380            _ => {
381                // "markdown" 和其他格式都返回原始文档
382                self.fetch_crate_docs(&params.crate_name, params.version.as_deref())
383                    .await?
384            }
385        };
386
387        Ok(rust_mcp_sdk::schema::CallToolResult::text_content(vec![
388            content.into(),
389        ]))
390    }
391}
392
393impl Default for LookupCrateToolImpl {
394    fn default() -> Self {
395        Self::new(Arc::new(super::DocService::default()))
396    }
397}
398
399/// 查找 crate 中的特定项目工具
400#[rust_mcp_sdk::macros::mcp_tool(
401    name = "lookup_item",
402    title = "查找 Crate 项目文档",
403    description = "从 docs.rs 获取 Rust crate 中特定项目(函数、结构体、trait、模块等)的文档。适用于查找特定 API 的详细用法和签名。支持搜索路径如 serde::Serialize、std::collections::HashMap 等。",
404    destructive_hint = false,
405    idempotent_hint = true,
406    open_world_hint = false,
407    read_only_hint = true,
408    execution(task_support = "optional"),
409    icons = [
410        (src = "https://docs.rs/favicon.ico", mime_type = "image/x-icon", sizes = ["32x32"], theme = "light"),
411        (src = "https://docs.rs/favicon.ico", mime_type = "image/x-icon", sizes = ["32x32"], theme = "dark")
412    ]
413)]
414#[derive(Debug, Clone, Deserialize, Serialize, rust_mcp_sdk::macros::JsonSchema)]
415pub struct LookupItemTool {
416    /// Crate name
417    #[json_schema(
418        title = "Crate 名称",
419        description = "要查找的 Crate name,例如:serde、tokio、std"
420    )]
421    pub crate_name: String,
422
423    /// 项目路径(例如 `std::collections::HashMap`)
424    #[json_schema(
425        title = "项目路径",
426        description = "要查找的项目路径,格式为 '模块::子模块::项目名'。例如:serde::Serialize、tokio::runtime::Runtime、std::collections::HashMap"
427    )]
428    pub item_path: String,
429
430    /// Version (optional, defaults to latest)
431    #[json_schema(
432        title = "版本号",
433        description = "指定 crate 版本号。不指定则使用最新版本"
434    )]
435    pub version: Option<String>,
436
437    /// Output format: markdown, text, or html
438    #[json_schema(
439        title = "输出格式",
440        description = "Documentation output format: markdown (default), text (plain text), html",
441        default = "markdown"
442    )]
443    pub format: Option<String>,
444}
445
446/// 查找 crate 中的特定项目工具实现
447pub struct LookupItemToolImpl {
448    service: Arc<DocService>,
449}
450
451impl LookupItemToolImpl {
452    /// 创建新的查找项目工具实例
453    #[must_use]
454    pub fn new(service: Arc<DocService>) -> Self {
455        Self { service }
456    }
457
458    /// 获取项目文档
459    async fn fetch_item_docs(
460        &self,
461        crate_name: &str,
462        item_path: &str,
463        version: Option<&str>,
464    ) -> std::result::Result<String, CallToolError> {
465        // Try to get from cache
466        if let Some(cached) = self
467            .service
468            .doc_cache()
469            .get_item_docs(crate_name, item_path, version)
470            .await
471        {
472            return Ok(cached);
473        }
474
475        // 构建搜索 URL
476        let url = if let Some(ver) = version {
477            format!(
478                "https://docs.rs/{}/{}/?search={}",
479                crate_name,
480                ver,
481                urlencoding::encode(item_path)
482            )
483        } else {
484            format!(
485                "https://docs.rs/{}/?search={}",
486                crate_name,
487                urlencoding::encode(item_path)
488            )
489        };
490
491        // Send HTTP request (reusing DocService client)
492        let response = self
493            .service
494            .client()
495            .get(&url)
496            .send()
497            .await
498            .map_err(|e| CallToolError::from_message(format!("HTTP request failed: {e}")))?;
499
500        if !response.status().is_success() {
501            return Err(CallToolError::from_message(format!(
502                "获取项目文档失败: HTTP {} - {}",
503                response.status(),
504                response.text().await.unwrap_or_default()
505            )));
506        }
507
508        let html = response
509            .text()
510            .await
511            .map_err(|e| CallToolError::from_message(format!("读取响应失败: {e}")))?;
512
513        // 提取搜索结果
514        let docs = extract_search_results(&html, item_path);
515
516        // 缓存结果
517        self.service
518            .doc_cache()
519            .set_item_docs(crate_name, item_path, version, docs.clone())
520            .await;
521
522        Ok(docs)
523    }
524
525    /// 获取原始 HTML(用于 text 格式)
526    async fn fetch_raw_html_for_item(
527        &self,
528        crate_name: &str,
529        item_path: &str,
530        version: Option<&str>,
531    ) -> std::result::Result<String, CallToolError> {
532        // 构建搜索 URL
533        let url = if let Some(ver) = version {
534            format!(
535                "https://docs.rs/{}/{}/?search={}",
536                crate_name,
537                ver,
538                urlencoding::encode(item_path)
539            )
540        } else {
541            format!(
542                "https://docs.rs/{}/?search={}",
543                crate_name,
544                urlencoding::encode(item_path)
545            )
546        };
547
548        // Send HTTP request (reusing DocService client)
549        let response = self
550            .service
551            .client()
552            .get(&url)
553            .send()
554            .await
555            .map_err(|e| CallToolError::from_message(format!("HTTP request failed: {e}")))?;
556
557        if !response.status().is_success() {
558            return Err(CallToolError::from_message(format!(
559                "获取项目文档失败: HTTP {} - {}",
560                response.status(),
561                response.text().await.unwrap_or_default()
562            )));
563        }
564
565        let html = response
566            .text()
567            .await
568            .map_err(|e| CallToolError::from_message(format!("读取响应失败: {e}")))?;
569
570        Ok(html)
571    }
572}
573
574/// 从 HTML 中提取搜索结果
575fn extract_search_results(html: &str, item_path: &str) -> String {
576    // 先清理 HTML(移除 script, style, noscript 等标签及内容)
577    let cleaned_html = clean_html(html);
578    // 使用 html2md 库将清理后的 HTML 转换为 Markdown
579    let markdown = html2md::parse_html(&cleaned_html);
580
581    // 如果搜索结果为空,返回提示信息
582    if markdown.trim().is_empty() {
583        format!("未找到项目 '{item_path}' 的文档")
584    } else {
585        format!("## 搜索结果: {item_path}\n\n{markdown}")
586    }
587}
588
589#[async_trait]
590impl Tool for LookupItemToolImpl {
591    fn definition(&self) -> rust_mcp_sdk::schema::Tool {
592        LookupItemTool::tool()
593    }
594
595    async fn execute(
596        &self,
597        arguments: serde_json::Value,
598    ) -> std::result::Result<
599        rust_mcp_sdk::schema::CallToolResult,
600        rust_mcp_sdk::schema::CallToolError,
601    > {
602        let params: LookupItemTool = serde_json::from_value(arguments).map_err(|e| {
603            rust_mcp_sdk::schema::CallToolError::invalid_arguments(
604                "lookup_item",
605                Some(format!("参数解析失败: {e}")),
606            )
607        })?;
608
609        let format = params.format.unwrap_or_else(|| "markdown".to_string());
610        let content = match format.as_str() {
611            "text" => {
612                // 获取原始 HTML 并转换为纯文本
613                let html = self
614                    .fetch_raw_html_for_item(
615                        &params.crate_name,
616                        &params.item_path,
617                        params.version.as_deref(),
618                    )
619                    .await?;
620                format!("搜索结果: {}\n\n{}", params.item_path, html_to_text(&html))
621            }
622            _ => {
623                // "markdown" 和其他格式都返回原始文档
624                self.fetch_item_docs(
625                    &params.crate_name,
626                    &params.item_path,
627                    params.version.as_deref(),
628                )
629                .await?
630            }
631        };
632
633        Ok(rust_mcp_sdk::schema::CallToolResult::text_content(vec![
634            content.into(),
635        ]))
636    }
637}
638
639impl Default for LookupItemToolImpl {
640    fn default() -> Self {
641        Self::new(Arc::new(super::DocService::default()))
642    }
643}