Skip to main content

crates_docs/tools/docs/
search.rs

1//! Search crates tool
2//!
3//! Provides functionality to search for Rust crates from crates.io.
4//! Returns a list of matching crates with metadata like name, description,
5//! version, downloads, etc.
6
7#![allow(missing_docs)]
8
9use crate::tools::Tool;
10use async_trait::async_trait;
11use rust_mcp_sdk::macros;
12use rust_mcp_sdk::schema::CallToolError;
13use serde::{Deserialize, Serialize};
14use std::sync::Arc;
15
16const DEFAULT_SEARCH_LIMIT: u32 = 10;
17const ESTIMATED_MARKDOWN_ENTRY_SIZE: usize = 200;
18const ESTIMATED_TEXT_ENTRY_SIZE: usize = 100;
19
20/// Search crates tool parameters
21///
22/// Used to specify search criteria for finding Rust crates on crates.io.
23#[macros::mcp_tool(
24    name = "search_crates",
25    title = "Search Crates",
26    description = "Search for Rust crates from crates.io. Returns a list of matching crates, including name, description, version, downloads, etc. Suitable for discovering and comparing available Rust libraries.",
27    destructive_hint = false,
28    idempotent_hint = true,
29    open_world_hint = false,
30    read_only_hint = true,
31    icons = [
32        (src = "https://crates.io/favicon.ico", mime_type = "image/x-icon", sizes = ["32x32"], theme = "light"),
33        (src = "https://crates.io/favicon.ico", mime_type = "image/x-icon", sizes = ["32x32"], theme = "dark")
34    ]
35)]
36/// Parameters for the `search_crates` tool
37///
38/// Defines the input parameters for searching Rust crates on crates.io,
39/// including the search query, result limit, sort order, and output format.
40#[derive(Debug, Clone, Deserialize, Serialize, macros::JsonSchema)]
41pub struct SearchCratesTool {
42    /// Search keywords (e.g., "web framework", "async", "http client")
43    #[json_schema(
44        title = "Search Query",
45        description = "Search keywords, e.g.: web framework, async, http client, serialization"
46    )]
47    pub query: String,
48
49    /// Maximum number of results to return (range 1-100, defaults to 10)
50    #[json_schema(
51        title = "Result Limit",
52        description = "Maximum number of results to return, range 1-100",
53        minimum = 1,
54        maximum = 100,
55        default = 10
56    )]
57    pub limit: Option<u32>,
58
59    /// Sort order: "relevance", "downloads", "recent-downloads", "recent-updates", "new"
60    #[json_schema(
61        title = "Sort Order",
62        description = "Sort order: relevance (default), downloads, recent-downloads, recent-updates, new",
63        default = "relevance"
64    )]
65    pub sort: Option<String>,
66
67    /// Output format: "markdown", "text", or "json" (defaults to "markdown")
68    #[json_schema(
69        title = "Output Format",
70        description = "Output format: markdown (default), text (plain text), json (structured JSON: name, version, downloads, recent_downloads, description, repository, documentation, docs_rs)",
71        default = "markdown"
72    )]
73    pub format: Option<String>,
74}
75
76const DEFAULT_SEARCH_SORT: &str = "relevance";
77const VALID_SEARCH_SORTS: &[&str] = &[
78    DEFAULT_SEARCH_SORT,
79    "downloads",
80    "recent-downloads",
81    "recent-updates",
82    "new",
83];
84
85/// Crates.io search response (typed deserialization)
86#[derive(Debug, Deserialize)]
87struct SearchCratesResponse {
88    crates: Vec<SearchCrateRecord>,
89}
90
91/// Individual crate record from crates.io search
92#[derive(Debug, Deserialize)]
93struct SearchCrateRecord {
94    name: String,
95    #[serde(default)]
96    description: Option<String>,
97    #[serde(default = "default_max_version")]
98    max_version: String,
99    /// Highest non-yanked version (crates.io). Preferred over `max_version`
100    /// (which can be a yanked release users cannot install).
101    #[serde(default)]
102    max_stable_version: Option<String>,
103    #[serde(default)]
104    downloads: u64,
105    /// Downloads in the last 90 days (crates.io `recent_downloads`). Drives the
106    /// `recent-downloads` sort, so it is surfaced alongside the total.
107    #[serde(default)]
108    recent_downloads: Option<u64>,
109    #[serde(default)]
110    repository: Option<String>,
111    #[serde(default)]
112    documentation: Option<String>,
113}
114
115fn default_max_version() -> String {
116    "0.0.0".to_string()
117}
118
119/// Implementation of the search crates tool
120///
121/// Handles the execution of crate searches on crates.io, including
122/// cache management, HTTP requests, and result formatting.
123pub struct SearchCratesToolImpl {
124    /// Shared document service for HTTP requests and caching
125    service: Arc<super::DocService>,
126}
127
128fn normalize_search_sort(sort: Option<&str>) -> std::result::Result<String, CallToolError> {
129    match sort {
130        Some(raw) => {
131            // Normalize like `parse_format`: trim surrounding whitespace and
132            // compare case-insensitively so e.g. "Downloads" or " downloads "
133            // are accepted. This also matches the cache-key normalization.
134            let normalized = raw.trim().to_lowercase();
135            if VALID_SEARCH_SORTS.contains(&normalized.as_str()) {
136                Ok(normalized)
137            } else {
138                Err(CallToolError::invalid_arguments(
139                    "search_crates",
140                    Some(format!(
141                        "Invalid sort option '{raw}', expected one of: {}",
142                        VALID_SEARCH_SORTS.join(", ")
143                    )),
144                ))
145            }
146        }
147        None => Ok(DEFAULT_SEARCH_SORT.to_string()),
148    }
149}
150
151impl SearchCratesToolImpl {
152    /// Create a new tool instance
153    #[must_use]
154    pub fn new(service: Arc<super::DocService>) -> Self {
155        Self { service }
156    }
157
158    /// Search crates
159    async fn search_crates(
160        &self,
161        query: &str,
162        limit: u32,
163        sort: &str,
164    ) -> std::result::Result<Vec<CrateInfo>, CallToolError> {
165        // Check cache using DocCache API
166        if let Some(cached) = self
167            .service
168            .doc_cache()
169            .get_search_results(query, limit, Some(sort))
170            .await
171        {
172            return serde_json::from_str(&cached).map_err(|e| {
173                CallToolError::from_message(format!("[search_crates] Cache parsing failed: {e}"))
174            });
175        }
176
177        // Build URL using helper function
178        let url = super::build_crates_io_search_url(query, Some(sort), Some(limit as usize));
179
180        let response = self
181            .service
182            .client()
183            .get(&url)
184            .header("User-Agent", crate::user_agent())
185            .send()
186            .await
187            .map_err(|e| {
188                CallToolError::from_message(format!("[search_crates] HTTP request failed: {e}"))
189            })?;
190
191        if !response.status().is_success() {
192            // Surface crates.io diagnostics (e.g. rate-limit explanations) from
193            // the response body instead of returning a bare status code. HTML
194            // error pages are suppressed to avoid dumping noise.
195            let status = response.status();
196            let body = response.text().await.unwrap_or_default();
197            let trimmed = body.trim();
198            let detail = if trimmed.is_empty()
199                || trimmed.starts_with('<')
200                || trimmed.to_ascii_lowercase().contains("<html")
201            {
202                String::new()
203            } else {
204                let snippet: String = trimmed.chars().take(200).collect();
205                format!(" - {snippet}")
206            };
207            return Err(CallToolError::from_message(format!(
208                "[search_crates] crates.io search failed: HTTP {status}{detail}"
209            )));
210        }
211
212        // Use typed deserialization instead of serde_json::Value
213        let search_response: SearchCratesResponse = response.json().await.map_err(|e| {
214            CallToolError::from_message(format!("[search_crates] JSON parsing failed: {e}"))
215        })?;
216
217        let crates = parse_crates_response(search_response, limit as usize);
218
219        let cache_value = serde_json::to_string(&crates).map_err(|e| {
220            CallToolError::from_message(format!("[search_crates] Serialization failed: {e}"))
221        })?;
222
223        // Cache the results. A cache write failure (e.g. a Redis outage) must
224        // not fail the user's request: the search succeeded, so log and
225        // continue returning the results uncached.
226        if let Err(e) = self
227            .service
228            .doc_cache()
229            .set_search_results(query, limit, Some(sort), cache_value)
230            .await
231        {
232            tracing::warn!(
233                "[search_crates] failed to cache search results (continuing uncached): {e}"
234            );
235        }
236
237        Ok(crates)
238    }
239}
240
241/// Crate information from search results
242#[derive(Debug, Clone, Serialize, Deserialize)]
243struct CrateInfo {
244    /// Crate name
245    name: String,
246    /// Crate description
247    description: Option<String>,
248    /// Latest version
249    version: String,
250    /// Total downloads
251    downloads: u64,
252    /// Recent downloads (last 90 days), when reported by crates.io. Shown next
253    /// to the total so `recent-downloads`-sorted results are not confusing.
254    #[serde(default)]
255    recent_downloads: Option<u64>,
256    /// Repository URL
257    repository: Option<String>,
258    /// Documentation URL (as provided by crates.io, if any)
259    documentation: Option<String>,
260    /// Canonical docs.rs URL for the crate (always present on fresh results).
261    /// Tolerate cache entries written by older binaries that predate this
262    /// field so a stale cache hit degrades to an empty value instead of a
263    /// fatal "Cache parsing failed" error.
264    #[serde(default)]
265    docs_rs: String,
266}
267
268#[inline]
269fn parse_crates_response(response: SearchCratesResponse, limit: usize) -> Vec<CrateInfo> {
270    response
271        .crates
272        .into_iter()
273        .take(limit)
274        .map(|crate_record| {
275            let docs_rs = format!("https://docs.rs/{}/", crate_record.name);
276            CrateInfo {
277                name: crate_record.name,
278                description: crate_record.description,
279                // Prefer the highest stable (non-yanked) version so results do
280                // not advertise a version users cannot `cargo add`. Fall back to
281                // max_version when a crate has no stable release.
282                version: crate_record
283                    .max_stable_version
284                    .unwrap_or(crate_record.max_version),
285                downloads: crate_record.downloads,
286                recent_downloads: crate_record.recent_downloads,
287                repository: crate_record.repository,
288                documentation: crate_record.documentation,
289                docs_rs,
290            }
291        })
292        .collect()
293}
294
295#[inline]
296fn format_search_results(crates: &[CrateInfo], format: super::Format) -> String {
297    match format {
298        // Machine-readable: an empty array is the correct, parseable result for
299        // a no-match search, so it is left as-is.
300        super::Format::Json => {
301            serde_json::to_string_pretty(crates).unwrap_or_else(|_| "[]".to_string())
302        }
303        // Human-readable formats must not return a blank (text) or header-only
304        // (markdown) body when nothing matched: that looks like a failure. Emit
305        // an explicit "no crates found" message instead.
306        super::Format::Text => {
307            if crates.is_empty() {
308                "No crates found matching the query.".to_string()
309            } else {
310                format_text_results(crates)
311            }
312        }
313        // `html` is rejected before formatting (see `execute`); list both
314        // variants explicitly so adding a new `Format` variant becomes a
315        // compile error here rather than a silent fall-through to markdown.
316        super::Format::Markdown | super::Format::Html => {
317            if crates.is_empty() {
318                "# Search Results\n\nNo crates found matching the query.".to_string()
319            } else {
320                format_markdown_results(crates)
321            }
322        }
323    }
324}
325
326/// Collapse internal whitespace runs (including embedded newlines) to single
327/// spaces and trim the ends so a publisher-supplied description renders as one
328/// clean field line. crates.io descriptions frequently carry a trailing
329/// newline, which otherwise splits a record with a blank line between
330/// `Description` and the following field.
331fn normalize_description(s: &str) -> String {
332    s.split_whitespace().collect::<Vec<_>>().join(" ")
333}
334
335/// Escape characters that would let upstream-controlled text (e.g. a crate
336/// description set by its publisher) inject markdown links, inline HTML, or
337/// code spans into the rendered output. Only structural characters are escaped
338/// so ordinary prose renders unchanged.
339fn escape_markdown_text(s: &str) -> String {
340    let mut out = String::with_capacity(s.len());
341    for c in s.chars() {
342        match c {
343            '\\' => out.push_str("\\\\"),
344            '[' => out.push_str("\\["),
345            ']' => out.push_str("\\]"),
346            '`' => out.push_str("\\`"),
347            '<' => out.push_str("&lt;"),
348            _ => out.push(c),
349        }
350    }
351    out
352}
353
354/// Render a publisher-supplied URL as a markdown link only when it is a plain
355/// `http(s)` URL free of characters that would break the link target or smuggle
356/// in extra markdown. Anything else is shown as inert text so a crafted
357/// `repository`/`documentation` field cannot inject an active or misleading
358/// link (including non-`http` schemes such as `javascript:`).
359fn render_markdown_url(label: &str, url: &str) -> String {
360    let is_http = url.starts_with("http://") || url.starts_with("https://");
361    let is_clean = !url.chars().any(|c| {
362        c.is_whitespace()
363            || c.is_control()
364            || matches!(c, '(' | ')' | '<' | '>' | '[' | ']' | '"' | '\\')
365    });
366    if is_http && is_clean {
367        format!("[{label}]({url})")
368    } else {
369        // Not a safe http(s) URL: show inert in a code span so it is neither a
370        // clickable link nor able to inject further markdown.
371        let inert: String = url
372            .chars()
373            .map(|c| if c == '`' || c.is_control() { ' ' } else { c })
374            .collect();
375        format!("`{inert}`")
376    }
377}
378
379fn format_markdown_results(crates: &[CrateInfo]) -> String {
380    // SAFETY: writeln! to String never fails (writes to memory buffer). unwrap() is safe here.
381    use std::fmt::Write;
382    let estimated_size = crates.len().saturating_mul(ESTIMATED_MARKDOWN_ENTRY_SIZE) + 20;
383    let mut output = String::with_capacity(estimated_size);
384    output.push_str("# Search Results\n\n");
385
386    for (i, crate_info) in crates.iter().enumerate() {
387        writeln!(output, "## {}. {}", i + 1, crate_info.name).unwrap();
388        writeln!(output, "**Version**: {}", crate_info.version).unwrap();
389        writeln!(output, "**Downloads**: {}", crate_info.downloads).unwrap();
390        if let Some(recent) = crate_info.recent_downloads {
391            writeln!(output, "**Recent downloads**: {recent}").unwrap();
392        }
393
394        if let Some(desc) = &crate_info.description {
395            writeln!(
396                output,
397                "**Description**: {}",
398                escape_markdown_text(&normalize_description(desc))
399            )
400            .unwrap();
401        }
402
403        if let Some(repo) = &crate_info.repository {
404            writeln!(
405                output,
406                "**Repository**: {}",
407                render_markdown_url("Link", repo)
408            )
409            .unwrap();
410        }
411
412        if let Some(docs) = &crate_info.documentation {
413            writeln!(
414                output,
415                "**Documentation**: {}",
416                render_markdown_url("Link", docs)
417            )
418            .unwrap();
419        }
420
421        writeln!(
422            output,
423            "**Docs.rs**: {}\n",
424            render_markdown_url(&crate_info.docs_rs, &crate_info.docs_rs)
425        )
426        .unwrap();
427    }
428
429    output
430}
431
432fn format_text_results(crates: &[CrateInfo]) -> String {
433    // SAFETY: writeln! to String never fails (writes to memory buffer). unwrap() is safe here.
434    use std::fmt::Write;
435    let estimated_size = crates.len().saturating_mul(ESTIMATED_TEXT_ENTRY_SIZE);
436    let mut output = String::with_capacity(estimated_size);
437
438    for (i, crate_info) in crates.iter().enumerate() {
439        writeln!(output, "{}. {}", i + 1, crate_info.name).unwrap();
440        writeln!(output, "   Version: {}", crate_info.version).unwrap();
441        writeln!(output, "   Downloads: {}", crate_info.downloads).unwrap();
442        if let Some(recent) = crate_info.recent_downloads {
443            writeln!(output, "   Recent downloads: {recent}").unwrap();
444        }
445
446        if let Some(desc) = &crate_info.description {
447            writeln!(output, "   Description: {}", normalize_description(desc)).unwrap();
448        }
449
450        // Mirror the markdown format so the text format does not silently drop
451        // the repository/documentation links when crates.io provides them.
452        if let Some(repo) = &crate_info.repository {
453            writeln!(output, "   Repository: {repo}").unwrap();
454        }
455
456        if let Some(docs) = &crate_info.documentation {
457            writeln!(output, "   Documentation: {docs}").unwrap();
458        }
459
460        writeln!(output, "   Docs.rs: {}", crate_info.docs_rs).unwrap();
461        writeln!(output).unwrap();
462    }
463
464    output
465}
466
467#[async_trait]
468impl Tool for SearchCratesToolImpl {
469    fn definition(&self) -> rust_mcp_sdk::schema::Tool {
470        SearchCratesTool::tool()
471    }
472
473    async fn execute(
474        &self,
475        arguments: serde_json::Value,
476    ) -> std::result::Result<
477        rust_mcp_sdk::schema::CallToolResult,
478        rust_mcp_sdk::schema::CallToolError,
479    > {
480        let params: SearchCratesTool = serde_json::from_value(arguments).map_err(|e| {
481            rust_mcp_sdk::schema::CallToolError::invalid_arguments(
482                "search_crates",
483                Some(format!("Parameter parsing failed: {e}")),
484            )
485        })?;
486
487        // Validate all input parameters up front (fail-fast) before making any
488        // network requests. This avoids wasted crates.io calls on invalid input
489        // and keeps input-validation errors deterministic regardless of network
490        // availability.
491        super::validate_search_query("search_crates", &params.query)?;
492        // Clamp to the documented range [1, 100]. A lower bound of 0 (or a
493        // value above 100) would otherwise silently produce an empty/odd
494        // result set and a `per_page=0` upstream request.
495        let limit = params.limit.unwrap_or(DEFAULT_SEARCH_LIMIT).clamp(1, 100);
496        let sort = normalize_search_sort(params.sort.as_deref())?;
497        // `parse_format` validates against SEARCH_FORMATS, so an unsupported
498        // (e.g. `html`) or unknown format is rejected here with an error that
499        // lists only the formats search actually accepts.
500        let format = super::parse_format(
501            "search_crates",
502            params.format.as_deref(),
503            super::SEARCH_FORMATS,
504        )?;
505
506        // Trim the query before fetching so the upstream crates.io request
507        // matches the normalized (trimmed + lowercased) cache key. Otherwise a
508        // query like "  tokio  " is sent verbatim to crates.io (poorer results)
509        // yet cached/looked-up under the trimmed key, letting a whitespace-laden
510        // first request poison the cache for every later "tokio" caller.
511        let crates = self
512            .search_crates(params.query.trim(), limit, &sort)
513            .await?;
514        let content = format_search_results(&crates, format);
515
516        Ok(rust_mcp_sdk::schema::CallToolResult::text_content(vec![
517            content.into(),
518        ]))
519    }
520}
521
522impl Default for SearchCratesToolImpl {
523    fn default() -> Self {
524        Self::new(Arc::new(super::DocService::default()))
525    }
526}
527
528#[cfg(test)]
529mod tests {
530    use super::*;
531
532    #[test]
533    fn test_format_search_results_empty_emits_message() {
534        use crate::tools::docs::Format;
535        let text = format_search_results(&[], Format::Text);
536        assert!(
537            text.contains("No crates found"),
538            "text empty should explain no matches: {text:?}"
539        );
540        let md = format_search_results(&[], Format::Markdown);
541        assert!(
542            md.contains("No crates found"),
543            "markdown empty should explain no matches: {md:?}"
544        );
545        // JSON stays machine-parseable: an empty array, not a prose message.
546        let json = format_search_results(&[], Format::Json);
547        assert_eq!(json, "[]");
548    }
549
550    #[test]
551    fn test_recent_downloads_parsed_and_rendered() {
552        use crate::tools::docs::Format;
553        let json = r#"{"crates":[
554            {"name":"a","max_stable_version":"1.0.0","downloads":1000,"recent_downloads":42}
555        ]}"#;
556        let resp: SearchCratesResponse = serde_json::from_str(json).unwrap();
557        let crates = parse_crates_response(resp, 10);
558        assert_eq!(crates[0].recent_downloads, Some(42));
559        let md = format_search_results(&crates, Format::Markdown);
560        assert!(md.contains("**Recent downloads**: 42"), "markdown: {md}");
561        let text = format_search_results(&crates, Format::Text);
562        assert!(text.contains("Recent downloads: 42"), "text: {text}");
563    }
564
565    #[test]
566    fn test_parse_crates_response_prefers_stable_version() {
567        // crates.io returns both max_version (may be yanked) and
568        // max_stable_version; the stable one must win so results do not
569        // advertise an uninstallable version.
570        let json = r#"{"crates":[
571            {"name":"a","max_version":"2.0.0-yanked","max_stable_version":"1.9.0","downloads":1},
572            {"name":"b","max_version":"0.3.0","downloads":2}
573        ]}"#;
574        let resp: SearchCratesResponse = serde_json::from_str(json).unwrap();
575        let crates = parse_crates_response(resp, 10);
576        assert_eq!(crates[0].version, "1.9.0");
577        // No max_stable_version -> fall back to max_version.
578        assert_eq!(crates[1].version, "0.3.0");
579    }
580
581    #[test]
582    fn test_format_text_results_includes_repository_and_documentation() {
583        let crates = vec![CrateInfo {
584            name: "demo".to_string(),
585            description: Some("A demo crate".to_string()),
586            version: "1.0.0".to_string(),
587            downloads: 42,
588            recent_downloads: None,
589            repository: Some("https://github.com/x/demo".to_string()),
590            documentation: Some("https://docs.rs/demo".to_string()),
591            docs_rs: "https://docs.rs/demo/".to_string(),
592        }];
593        let out = format_text_results(&crates);
594        assert!(
595            out.contains("Repository: https://github.com/x/demo"),
596            "{out}"
597        );
598        assert!(out.contains("Documentation: https://docs.rs/demo"), "{out}");
599        assert!(out.contains("Docs.rs: https://docs.rs/demo/"), "{out}");
600    }
601
602    #[test]
603    fn test_description_trailing_newline_does_not_split_record() {
604        // crates.io descriptions frequently end with a trailing newline; it must
605        // not insert a blank line between Description and the next field.
606        let crates = vec![CrateInfo {
607            name: "futures-executor".to_string(),
608            description: Some("Runtime for the async/await macros.\n".to_string()),
609            version: "0.3.0".to_string(),
610            downloads: 1,
611            recent_downloads: None,
612            repository: Some("https://github.com/rust-lang/futures-rs".to_string()),
613            documentation: None,
614            docs_rs: "https://docs.rs/futures-executor/".to_string(),
615        }];
616
617        let text = format_text_results(&crates);
618        assert!(
619            text.contains("Description: Runtime for the async/await macros.\n   Repository:"),
620            "text record split by stray blank line: {text:?}"
621        );
622
623        let md = format_markdown_results(&crates);
624        assert!(
625            !md.contains("macros.\n\n**Repository"),
626            "markdown record split by stray blank line: {md:?}"
627        );
628    }
629}