Skip to main content

crates_docs/tools/docs/
mod.rs

1//! Document lookup tool module
2//!
3//! Provides tools and services for querying Rust crate documentation.
4//!
5//! # Submodules
6//!
7//! - `cache`: Document cache
8//! - `html`: HTML processing
9//! - `lookup_crate`: Crate documentation lookup
10//! - `lookup_item`: Item documentation lookup
11//! - `search`: Crate search
12//!
13//! # Examples
14//!
15//! ```rust,no_run
16//! use std::sync::Arc;
17//! use crates_docs::tools::docs::DocService;
18//! use crates_docs::cache::memory::MemoryCache;
19//!
20//! let cache = Arc::new(MemoryCache::new(1000));
21//! let service = DocService::new(cache).expect("Failed to create DocService");
22//! ```
23
24pub mod cache;
25pub mod html;
26pub mod lookup_crate;
27pub mod lookup_item;
28pub mod search;
29
30use crate::cache::{Cache, CacheConfig};
31use crate::config::PerformanceConfig;
32use rust_mcp_sdk::schema::CallToolError;
33use std::sync::Arc;
34
35/// Output format for documentation
36#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
37pub enum Format {
38    /// Markdown format
39    #[default]
40    Markdown,
41    /// Plain text format
42    Text,
43    /// HTML format
44    Html,
45    /// JSON format (used by search tool)
46    Json,
47}
48
49impl std::fmt::Display for Format {
50    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
51        match self {
52            Self::Markdown => write!(f, "markdown"),
53            Self::Text => write!(f, "text"),
54            Self::Html => write!(f, "html"),
55            Self::Json => write!(f, "json"),
56        }
57    }
58}
59
60/// Formats supported by the documentation lookup tools (`lookup_crate`,
61/// `lookup_item`). JSON is intentionally excluded: these tools render prose
62/// documentation, not structured data.
63pub const DOC_FORMATS: &[Format] = &[Format::Markdown, Format::Text, Format::Html];
64
65/// Formats supported by the `search_crates` tool. HTML is intentionally
66/// excluded: search results are structured records, not an HTML document.
67pub const SEARCH_FORMATS: &[Format] = &[Format::Markdown, Format::Text, Format::Json];
68
69/// Parse and validate a format string against the formats a tool supports.
70///
71/// `allowed` lists the formats the calling tool actually accepts. Both an
72/// unrecognized string and a recognized format outside `allowed` produce an
73/// error that lists only the supported formats, so a caller is never advised to
74/// retry with a format the tool will then reject. `None` defaults to markdown,
75/// which every tool supports.
76pub fn parse_format(
77    tool_name: &str,
78    format_str: Option<&str>,
79    allowed: &[Format],
80) -> Result<Format, CallToolError> {
81    let Some(s) = format_str else {
82        return Ok(Format::Markdown);
83    };
84    let parsed = match s.trim().to_lowercase().as_str() {
85        "markdown" => Some(Format::Markdown),
86        "text" => Some(Format::Text),
87        "html" => Some(Format::Html),
88        "json" => Some(Format::Json),
89        _ => None,
90    };
91    match parsed {
92        Some(format) if allowed.contains(&format) => Ok(format),
93        _ => {
94            let supported = allowed
95                .iter()
96                .map(ToString::to_string)
97                .collect::<Vec<_>>()
98                .join(", ");
99            Err(CallToolError::invalid_arguments(
100                tool_name,
101                Some(format!(
102                    "Invalid format '{s}'. This tool supports: {supported}"
103                )),
104            ))
105        }
106    }
107}
108
109/// Validate a crate name supplied by a tool caller.
110///
111/// Crate names on crates.io are restricted to ASCII alphanumerics plus `_` and
112/// `-`. Rejecting anything else early provides a clear error and prevents
113/// malformed values (path separators, `..`, whitespace, control characters)
114/// from being interpolated into docs.rs URLs.
115///
116/// # Errors
117///
118/// Returns a `CallToolError` describing the first problem found.
119pub fn validate_crate_name(tool_name: &str, crate_name: &str) -> Result<(), CallToolError> {
120    let name = crate_name.trim();
121    if name.is_empty() {
122        return Err(CallToolError::invalid_arguments(
123            tool_name,
124            Some("crate_name must not be empty".to_string()),
125        ));
126    }
127    if name.len() > 64 {
128        return Err(CallToolError::invalid_arguments(
129            tool_name,
130            Some("crate_name is too long (max 64 characters)".to_string()),
131        ));
132    }
133    if !name
134        .bytes()
135        .all(|b| b.is_ascii_alphanumeric() || b == b'_' || b == b'-')
136    {
137        return Err(CallToolError::invalid_arguments(
138            tool_name,
139            Some(format!(
140                "Invalid crate_name '{crate_name}'. Only ASCII letters, digits, '_' and '-' are allowed"
141            )),
142        ));
143    }
144    Ok(())
145}
146
147/// Normalize a user-supplied version string for docs.rs URL construction.
148///
149/// Trims surrounding whitespace and strips a single leading `v`/`V` when it is
150/// immediately followed by a digit (e.g. `v1.2.3` -> `1.2.3`). crates.io and
151/// docs.rs versions are plain semver and never carry a `v` prefix, but users
152/// routinely copy versions from git tags or changelogs where that prefix is
153/// conventional; without this they hit a confusing 400/404. Non-version
154/// identifiers such as `latest` (no leading-`v`-before-digit) are unchanged.
155#[must_use]
156pub fn normalize_version(version: &str) -> String {
157    let trimmed = version.trim();
158    let bytes = trimmed.as_bytes();
159    if bytes.len() >= 2 && (bytes[0] == b'v' || bytes[0] == b'V') && bytes[1].is_ascii_digit() {
160        trimmed[1..].to_string()
161    } else {
162        trimmed.to_string()
163    }
164}
165
166/// Validate an optional version string supplied by a tool caller.
167///
168/// Accepts concrete versions and identifiers such as `latest` while rejecting
169/// path-traversal sequences and characters that could escape the docs.rs path.
170///
171/// # Errors
172///
173/// Returns a `CallToolError` describing the first problem found.
174pub fn validate_version(tool_name: &str, version: Option<&str>) -> Result<(), CallToolError> {
175    let Some(raw) = version else {
176        return Ok(());
177    };
178    let ver = raw.trim();
179    if ver.is_empty() {
180        return Err(CallToolError::invalid_arguments(
181            tool_name,
182            Some("version must not be empty when provided".to_string()),
183        ));
184    }
185    if ver.len() > 64 {
186        return Err(CallToolError::invalid_arguments(
187            tool_name,
188            Some("version is too long (max 64 characters)".to_string()),
189        ));
190    }
191    if ver.contains("..")
192        || !ver
193            .bytes()
194            .all(|b| b.is_ascii_alphanumeric() || matches!(b, b'.' | b'-' | b'+' | b'_' | b'~'))
195    {
196        return Err(CallToolError::invalid_arguments(
197            tool_name,
198            Some(format!(
199                "Invalid version '{raw}'. Only ASCII letters, digits and '.', '-', '+', '_', '~' are allowed"
200            )),
201        ));
202    }
203    Ok(())
204}
205
206/// Validate a search query supplied by a tool caller.
207///
208/// Rejects empty/whitespace-only queries (which would otherwise trigger an
209/// unfiltered crates.io request returning arbitrary crates) and overly long
210/// queries that cannot represent a meaningful search.
211///
212/// # Errors
213///
214/// Returns a `CallToolError` describing the first problem found.
215pub fn validate_search_query(tool_name: &str, query: &str) -> Result<(), CallToolError> {
216    let trimmed = query.trim();
217    if trimmed.is_empty() {
218        return Err(CallToolError::invalid_arguments(
219            tool_name,
220            Some("query must not be empty".to_string()),
221        ));
222    }
223    if trimmed.len() > 200 {
224        return Err(CallToolError::invalid_arguments(
225            tool_name,
226            Some("query is too long (max 200 characters)".to_string()),
227        ));
228    }
229    Ok(())
230}
231
232/// Validate an item path supplied by a tool caller.
233///
234/// Item paths are Rust paths made of identifier segments separated by `::`
235/// (for example `serde::Serialize` or `std::vec::Vec::push`). This rejects
236/// path-traversal sequences and characters such as `/`, `.` or whitespace that
237/// could escape the docs.rs path or otherwise form an invalid request, giving
238/// callers an actionable error instead of an opaque HTTP 400.
239///
240/// # Errors
241///
242/// Returns a `CallToolError` describing the first problem found.
243pub fn validate_item_path(tool_name: &str, item_path: &str) -> Result<(), CallToolError> {
244    let path = item_path.trim();
245    if path.is_empty() {
246        return Err(CallToolError::invalid_arguments(
247            tool_name,
248            Some("item_path must not be empty".to_string()),
249        ));
250    }
251    if path.len() > 256 {
252        return Err(CallToolError::invalid_arguments(
253            tool_name,
254            Some("item_path is too long (max 256 characters)".to_string()),
255        ));
256    }
257    if path.contains("..")
258        || !path
259            .bytes()
260            .all(|b| b.is_ascii_alphanumeric() || b == b'_' || b == b':')
261    {
262        return Err(CallToolError::invalid_arguments(
263            tool_name,
264            Some(format!(
265                "Invalid item_path '{item_path}'. Only ASCII letters, digits, '_' and '::' separators are allowed"
266            )),
267        ));
268    }
269    // Rust paths use '::' as the separator; a lone ':' (e.g. `serde:Serialize`)
270    // or an empty segment (e.g. `serde::`) would otherwise pass the byte check
271    // above and then silently fall back to the crate overview after a 404.
272    if path
273        .split("::")
274        .any(|segment| segment.is_empty() || segment.contains(':'))
275    {
276        return Err(CallToolError::invalid_arguments(
277            tool_name,
278            Some(format!(
279                "Invalid item_path '{item_path}'. Path segments must be separated by '::'"
280            )),
281        ));
282    }
283    Ok(())
284}
285
286/// Summarize a non-success HTTP response from docs.rs into a concise,
287/// actionable error string.
288///
289/// docs.rs returns a full HTML error page (often several KB) for failures such
290/// as 404. Dumping that entire page into the tool error is noisy and unhelpful,
291/// so this collapses it to the status plus a short hint. HTML bodies are never
292/// echoed back; only short plain-text bodies are included as a snippet.
293fn summarize_http_status(status: reqwest::StatusCode, body: &str) -> String {
294    if status == reqwest::StatusCode::NOT_FOUND {
295        return "HTTP 404 Not Found - the requested crate, version, or item does not exist on docs.rs. Verify the crate name, version, and item path.".to_string();
296    }
297
298    let trimmed = body.trim();
299    let lower = trimmed.to_ascii_lowercase();
300    let looks_like_html =
301        trimmed.starts_with('<') || lower.contains("<!doctype") || lower.contains("<html");
302    if trimmed.is_empty() || looks_like_html {
303        format!("HTTP {status}")
304    } else {
305        let snippet: String = trimmed.chars().take(200).collect();
306        format!("HTTP {status} - {snippet}")
307    }
308}
309
310#[cfg(not(test))]
311const DOCS_RS_BASE_URL: &str = "https://docs.rs";
312
313#[cfg(not(test))]
314const CRATES_IO_BASE_URL: &str = "https://crates.io";
315
316#[must_use]
317#[cfg(test)]
318/// Get the docs.rs base URL (configurable via environment variable for testing)
319pub fn docs_rs_base_url() -> String {
320    std::env::var("CRATES_DOCS_DOCS_RS_URL").unwrap_or_else(|_| "https://docs.rs".to_string())
321}
322
323#[must_use]
324#[cfg(not(test))]
325/// Get the docs.rs base URL
326pub fn docs_rs_base_url() -> String {
327    DOCS_RS_BASE_URL.to_string()
328}
329
330#[must_use]
331#[cfg(test)]
332/// Get the crates.io base URL (configurable via environment variable for testing)
333pub fn crates_io_base_url() -> String {
334    std::env::var("CRATES_DOCS_CRATES_IO_URL").unwrap_or_else(|_| "https://crates.io".to_string())
335}
336
337#[must_use]
338#[cfg(not(test))]
339/// Get the crates.io base URL
340pub fn crates_io_base_url() -> String {
341    CRATES_IO_BASE_URL.to_string()
342}
343/// Standard distribution crates documented on doc.rust-lang.org.
344///
345/// The `std`, `core`, `alloc`, `proc_macro`, and `test` crates are not
346/// published to docs.rs; their rustdoc lives on doc.rust-lang.org using the
347/// same item-page layout but without a version path segment. Item and index
348/// (`all.html`) URLs must target that host or every lookup 404s and silently
349/// falls back to the crate overview.
350#[must_use]
351pub fn is_rust_std_crate(crate_name: &str) -> bool {
352    matches!(
353        crate_name,
354        "std" | "core" | "alloc" | "proc_macro" | "proc-macro" | "test"
355    )
356}
357
358/// Base URL for Rust std-family crate docs on doc.rust-lang.org, honoring an
359/// explicit version.
360///
361/// `doc.rust-lang.org` serves versioned docs at `/{version}/{krate}/` (e.g.
362/// `https://doc.rust-lang.org/1.75.0/std/`, and channels `stable`/`beta`/
363/// `nightly`). `None` or `"latest"` use the unversioned current docs. The
364/// returned base always ends in `/`.
365fn rust_lang_docs_base(krate: &str, version: Option<&str>) -> String {
366    match version {
367        Some(ver) if !ver.trim().is_empty() && ver != "latest" => {
368            format!("https://doc.rust-lang.org/{}/{krate}/", ver.trim())
369        }
370        _ => format!("https://doc.rust-lang.org/{krate}/"),
371    }
372}
373
374/// Build docs.rs URL for crate documentation
375#[must_use]
376pub fn build_docs_url(crate_name: &str, version: Option<&str>) -> String {
377    if is_rust_std_crate(crate_name) {
378        let krate = crate_name.replace('-', "_");
379        return rust_lang_docs_base(&krate, version);
380    }
381    let base_url = docs_rs_base_url();
382    match version {
383        Some(ver) => format!("{base_url}/{crate_name}/{ver}/"),
384        None => format!("{base_url}/{crate_name}/"),
385    }
386}
387
388/// Build docs.rs search URL for item lookup
389#[must_use]
390pub fn build_docs_item_url(crate_name: &str, version: Option<&str>, item_path: &str) -> String {
391    let encoded_path = urlencoding::encode(item_path);
392    if is_rust_std_crate(crate_name) {
393        // std/core/alloc/etc. are not published to docs.rs; their docs live on
394        // doc.rust-lang.org. Mirror the other URL builders so the last-resort
395        // fallback degrades to the crate overview instead of a hard 404.
396        let krate = crate_name.replace('-', "_");
397        let base = rust_lang_docs_base(&krate, version);
398        return format!("{base}?search={encoded_path}");
399    }
400    let base_url = docs_rs_base_url();
401    match version {
402        Some(ver) => format!("{base_url}/{crate_name}/{ver}/?search={encoded_path}"),
403        None => format!("{base_url}/{crate_name}/?search={encoded_path}"),
404    }
405}
406
407/// Build candidate docs.rs URLs for a specific item, in priority order.
408///
409/// rustdoc item pages use predictable `{kind}.{name}.html` file names, but the
410/// item kind (struct/trait/fn/...) cannot be derived from the path alone. This
411/// returns the plausible candidate URLs to probe; the caller fetches each in
412/// order and uses the first that exists (HTTP 200). A trailing module candidate
413/// (`{name}/index.html`) covers items that are themselves modules.
414///
415/// The crate's library path component uses the underscore form (docs.rs maps
416/// `-` to `_` for module paths). A leading path segment equal to the crate name
417/// is dropped so both `Serialize` and `serde::Serialize` resolve correctly.
418#[must_use]
419pub fn build_docs_item_url_candidates(
420    crate_name: &str,
421    version: Option<&str>,
422    item_path: &str,
423) -> Vec<String> {
424    let krate = crate_name.replace('-', "_");
425
426    let segments: Vec<&str> = item_path
427        .split("::")
428        .map(str::trim)
429        .filter(|s| !s.is_empty())
430        .collect();
431    let Some((item, mods)) = segments.split_last() else {
432        return Vec::new();
433    };
434
435    // Drop a redundant leading crate-name segment (e.g. `serde::Serialize`).
436    let mods: &[&str] = if mods.first().map(|m| m.replace('-', "_")) == Some(krate.clone()) {
437        &mods[1..]
438    } else {
439        mods
440    };
441
442    let mut prefix = if is_rust_std_crate(crate_name) {
443        rust_lang_docs_base(&krate, version)
444    } else {
445        let base_url = docs_rs_base_url();
446        let ver = version.unwrap_or("latest");
447        format!("{base_url}/{crate_name}/{ver}/{krate}/")
448    };
449    for m in mods {
450        prefix.push_str(m);
451        prefix.push('/');
452    }
453
454    // Ordered roughly by how common each item kind is.
455    let kinds = [
456        "struct",
457        "trait",
458        "enum",
459        "fn",
460        "type",
461        "macro",
462        "attr",
463        "constant",
464        "derive",
465        "union",
466        "primitive",
467    ];
468    let mut candidates: Vec<String> = kinds
469        .iter()
470        .map(|k| format!("{prefix}{k}.{item}.html"))
471        .collect();
472    // The item itself may be a module.
473    candidates.push(format!("{prefix}{item}/index.html"));
474    candidates
475}
476
477/// Build the docs.rs `all.html` index URL for a crate.
478///
479/// rustdoc emits an `all.html` page listing every item in the crate (including
480/// re-exports) with hrefs relative to the crate root module. It is used to
481/// resolve items that have no stub page at the path implied by their name.
482#[must_use]
483pub fn build_docs_all_items_url(crate_name: &str, version: Option<&str>) -> String {
484    let krate = crate_name.replace('-', "_");
485    if is_rust_std_crate(crate_name) {
486        let base = rust_lang_docs_base(&krate, version);
487        return format!("{base}all.html");
488    }
489    let base_url = docs_rs_base_url();
490    let ver = version.unwrap_or("latest");
491    format!("{base_url}/{crate_name}/{ver}/{krate}/all.html")
492}
493
494/// Resolve an item page URL from a crate's `all.html` index by item name.
495///
496/// Returns the absolute docs.rs URL of the first item whose rustdoc file name is
497/// `{kind}.{item_name}.html` (for any item kind). This resolves re-exported
498/// items such as `tokio::spawn` (actually defined at `tokio::task::spawn`),
499/// which have no stub page at the crate root. Returns `None` if no match is
500/// found or the name is empty.
501#[must_use]
502pub fn find_item_url_in_all_html(
503    crate_name: &str,
504    version: Option<&str>,
505    all_html: &str,
506    item_name: &str,
507) -> Option<String> {
508    let item_name = item_name.trim();
509    if item_name.is_empty() {
510        return None;
511    }
512    let kinds = "struct|trait|enum|fn|type|macro|attr|constant|derive|union|primitive";
513    let pattern = format!(
514        "href=\"((?:[^\"]*/)?(?:{kinds})\\.{}\\.html)\"",
515        regex::escape(item_name)
516    );
517    let re = regex::Regex::new(&pattern).ok()?;
518    let href = re.captures(all_html)?.get(1)?.as_str();
519
520    let krate = crate_name.replace('-', "_");
521    if is_rust_std_crate(crate_name) {
522        // std/core/alloc docs live on doc.rust-lang.org, not docs.rs; the
523        // all.html index there links relative to the crate root.
524        let base = rust_lang_docs_base(&krate, version);
525        return Some(format!("{base}{href}"));
526    }
527    let base_url = docs_rs_base_url();
528    let ver = version.unwrap_or("latest");
529    Some(format!("{base_url}/{crate_name}/{ver}/{krate}/{href}"))
530}
531
532/// Build crates.io API search URL
533#[must_use]
534pub fn build_crates_io_search_url(query: &str, sort: Option<&str>, limit: Option<usize>) -> String {
535    let base_url = crates_io_base_url();
536    let sort = sort.unwrap_or("relevance");
537    let limit = limit.unwrap_or(10);
538    format!(
539        "{}/api/v1/crates?q={}&per_page={}&sort={}",
540        base_url,
541        urlencoding::encode(query),
542        limit,
543        urlencoding::encode(sort)
544    )
545}
546
547/// Document service
548///
549/// Provides centralized management of HTTP client (with auto-retry), cache, and document cache.
550///
551/// # Fields
552///
553/// - `client`: HTTP client with retry middleware (shared reference for connection pool reuse)
554/// - `cache`: Generic cache instance
555/// - `doc_cache`: Document-specific cache
556pub struct DocService {
557    client: Arc<reqwest_middleware::ClientWithMiddleware>,
558    cache: Arc<dyn Cache>,
559    doc_cache: cache::DocCache,
560}
561
562impl DocService {
563    /// Create new document service (with default TTL)
564    ///
565    /// # Arguments
566    ///
567    /// * `cache` - cache instance
568    ///
569    /// # Errors
570    ///
571    /// Returns error if HTTP client creation fails
572    ///
573    /// # Examples
574    ///
575    /// ```rust,no_run
576    /// use std::sync::Arc;
577    /// use crates_docs::tools::docs::DocService;
578    /// use crates_docs::cache::memory::MemoryCache;
579    ///
580    /// let cache = Arc::new(MemoryCache::new(1000));
581    /// let service = DocService::new(cache).expect("Failed to create DocService");
582    /// ```
583    ///
584    /// # Note
585    ///
586    /// This method uses the global HTTP client singleton for connection pool reuse.
587    /// Make sure to call `init_global_http_client()` during server initialization
588    /// for optimal performance.
589    pub fn new(cache: Arc<dyn Cache>) -> crate::error::Result<Self> {
590        Self::with_config(cache, &CacheConfig::default())
591    }
592
593    /// Create new document service (with custom cache config)
594    ///
595    /// # Arguments
596    ///
597    /// * `cache` - cache instance
598    /// * `cache_config` - cache configuration
599    ///
600    /// # Errors
601    ///
602    /// Returns error if HTTP client creation fails
603    ///
604    /// # Note
605    ///
606    /// This method uses the global HTTP client singleton for connection pool reuse.
607    /// If the global client is not initialized, it will be initialized with default config.
608    pub fn with_config(
609        cache: Arc<dyn Cache>,
610        cache_config: &CacheConfig,
611    ) -> crate::error::Result<Self> {
612        let ttl = cache::DocCacheTtl::from_cache_config(cache_config);
613        let doc_cache = cache::DocCache::with_ttl(cache.clone(), ttl);
614        // Use global HTTP client singleton for connection pool reuse
615        let client = crate::utils::get_or_init_global_http_client()?;
616        Ok(Self {
617            client,
618            cache,
619            doc_cache,
620        })
621    }
622
623    /// Create new document service (with full config)
624    ///
625    /// # Arguments
626    ///
627    /// * `cache` - cache instance
628    /// * `cache_config` - cache configuration
629    /// * `perf_config` - performance configuration(used only for initializing global HTTP client if not yet initialized)
630    ///
631    /// # Errors
632    ///
633    /// Returns error if HTTP client creation fails
634    ///
635    /// # Note
636    ///
637    /// This method uses the global HTTP client singleton for connection pool reuse.
638    /// The `perf_config` is used only if the global client hasn't been initialized yet.
639    /// For consistent configuration, call `init_global_http_client()` during server startup.
640    pub fn with_full_config(
641        cache: Arc<dyn Cache>,
642        cache_config: &CacheConfig,
643        _perf_config: &PerformanceConfig,
644    ) -> crate::error::Result<Self> {
645        let ttl = cache::DocCacheTtl::from_cache_config(cache_config);
646        let doc_cache = cache::DocCache::with_ttl(cache.clone(), ttl);
647        // Use global HTTP client singleton for connection pool reuse
648        let client = crate::utils::get_or_init_global_http_client()?;
649        Ok(Self {
650            client,
651            cache,
652            doc_cache,
653        })
654    }
655
656    /// Get HTTP client (with retry middleware)
657    #[must_use]
658    pub fn client(&self) -> &reqwest_middleware::ClientWithMiddleware {
659        &self.client
660    }
661
662    /// Get cache instance
663    #[must_use]
664    pub fn cache(&self) -> &Arc<dyn Cache> {
665        &self.cache
666    }
667
668    /// Get document cache
669    #[must_use]
670    pub fn doc_cache(&self) -> &cache::DocCache {
671        &self.doc_cache
672    }
673
674    /// Fetch HTML content from a URL
675    ///
676    /// This is a shared utility method used by multiple tools to fetch HTML
677    /// from docs.rs and crates.io.
678    ///
679    /// # Arguments
680    ///
681    /// * `url` - The URL to fetch
682    /// * `tool_name` - Optional tool name for better error messages (e.g., "`lookup_crate`", "`lookup_item`")
683    ///
684    /// # Errors
685    ///
686    /// Returns a `CallToolError` if:
687    /// - The HTTP request fails
688    /// - The response status is not successful
689    /// - Reading the response body fails
690    pub async fn fetch_html(
691        &self,
692        url: &str,
693        tool_name: Option<&str>,
694    ) -> Result<String, CallToolError> {
695        let response = self.client.get(url).send().await.map_err(|e| {
696            let prefix = tool_name.map_or(String::new(), |n| format!("[{n}] "));
697            CallToolError::from_message(format!("{prefix}HTTP request failed: {e}"))
698        })?;
699
700        let status = response.status();
701        if !status.is_success() {
702            let error_body = response.text().await.map_err(|e| {
703                let prefix = tool_name.map_or(String::new(), |n| format!("[{n}] "));
704                CallToolError::from_message(format!("{prefix}Failed to read error response: {e}"))
705            })?;
706            let prefix = tool_name.map_or(String::new(), |n| format!("[{n}] "));
707            return Err(CallToolError::from_message(format!(
708                "{prefix}Failed to get documentation: {}",
709                summarize_http_status(status, &error_body)
710            )));
711        }
712
713        response.text().await.map_err(|e| {
714            let prefix = tool_name.map_or(String::new(), |n| format!("[{n}] "));
715            CallToolError::from_message(format!("{prefix}Failed to read response: {e}"))
716        })
717    }
718
719    /// Fetch HTML from `url`, returning `Ok(None)` when the resource does not
720    /// exist (HTTP 404) instead of an error.
721    ///
722    /// This is used to probe candidate docs.rs item URLs where a 404 simply
723    /// means "this item kind does not match" rather than a hard failure.
724    ///
725    /// # Errors
726    ///
727    /// Returns a `CallToolError` if the request fails, the response has a
728    /// non-success status other than 404, or reading the body fails.
729    pub async fn fetch_html_optional(
730        &self,
731        url: &str,
732        tool_name: Option<&str>,
733    ) -> Result<Option<String>, CallToolError> {
734        let response = self.client.get(url).send().await.map_err(|e| {
735            let prefix = tool_name.map_or(String::new(), |n| format!("[{n}] "));
736            CallToolError::from_message(format!("{prefix}HTTP request failed: {e}"))
737        })?;
738
739        let status = response.status();
740        if status == reqwest::StatusCode::NOT_FOUND {
741            return Ok(None);
742        }
743        if !status.is_success() {
744            // Surface a body-read failure instead of masking it with an empty
745            // string (matches `fetch_html` and the documented contract).
746            let error_body = response.text().await.map_err(|e| {
747                let prefix = tool_name.map_or(String::new(), |n| format!("[{n}] "));
748                CallToolError::from_message(format!("{prefix}Failed to read error response: {e}"))
749            })?;
750            let prefix = tool_name.map_or(String::new(), |n| format!("[{n}] "));
751            return Err(CallToolError::from_message(format!(
752                "{prefix}Failed to get documentation: {}",
753                summarize_http_status(status, &error_body)
754            )));
755        }
756
757        let body = response.text().await.map_err(|e| {
758            let prefix = tool_name.map_or(String::new(), |n| format!("[{n}] "));
759            CallToolError::from_message(format!("{prefix}Failed to read response: {e}"))
760        })?;
761        Ok(Some(body))
762    }
763
764    /// Create new document service with custom HTTP client (for testing)
765    #[must_use]
766    pub fn with_custom_client(
767        cache: Arc<dyn Cache>,
768        cache_config: &CacheConfig,
769        client: Arc<reqwest_middleware::ClientWithMiddleware>,
770    ) -> Self {
771        let ttl = cache::DocCacheTtl::from_cache_config(cache_config);
772        let doc_cache = cache::DocCache::with_ttl(cache.clone(), ttl);
773        Self {
774            client,
775            cache,
776            doc_cache,
777        }
778    }
779}
780
781impl Default for DocService {
782    fn default() -> Self {
783        // Try to create with fallible initialization
784        Self::try_default_with_fallback()
785    }
786}
787
788impl DocService {
789    /// Create `DocService` with default settings using fallible initialization
790    ///
791    /// This method attempts to create a fully configured HTTP client.
792    /// If that fails, it falls back to a basic client without retry middleware.
793    /// The fallback uses `Client::new()` which is infallible.
794    fn try_default_with_fallback() -> Self {
795        let cache = Arc::new(crate::cache::memory::MemoryCache::new(1000));
796        let cache_config = CacheConfig::default();
797
798        // Try to create client with full configuration (may fail in extreme cases)
799        let client: Arc<reqwest_middleware::ClientWithMiddleware> =
800            if let Ok(c) = crate::utils::HttpClientBuilder::new().build() {
801                Arc::new(c)
802            } else {
803                // Fallback: create a minimal client without retry middleware.
804                // Apply timeouts matching HttpClientBuilder's defaults so the
805                // fallback cannot hang forever on a slow/stalled connection.
806                // If the builder fails for any reason, fall back to the
807                // infallible Client::new() (which never panics).
808                let plain_client = reqwest::Client::builder()
809                    .timeout(std::time::Duration::from_secs(30))
810                    .connect_timeout(std::time::Duration::from_secs(10))
811                    .build()
812                    .unwrap_or_else(|_| reqwest::Client::new());
813                Arc::new(reqwest_middleware::ClientBuilder::new(plain_client).build())
814            };
815
816        let ttl = cache::DocCacheTtl::from_cache_config(&cache_config);
817        let doc_cache = cache::DocCache::with_ttl(cache.clone(), ttl);
818
819        Self {
820            client,
821            cache,
822            doc_cache,
823        }
824    }
825}
826
827/// Re-export tool types
828pub use lookup_crate::LookupCrateTool;
829pub use lookup_item::LookupItemTool;
830pub use search::SearchCratesTool;
831
832/// Re-export cache types
833pub use cache::DocCacheTtl;
834
835#[cfg(test)]
836mod tests {
837    use super::*;
838
839    /// All syntactically valid formats, used to exercise the string->Format
840    /// mapping independently of any single tool's allowed set.
841    const ALL: &[Format] = &[Format::Markdown, Format::Text, Format::Html, Format::Json];
842
843    #[test]
844    fn test_validate_crate_name_accepts_valid() {
845        assert!(validate_crate_name("lookup_crate", "serde").is_ok());
846        assert!(validate_crate_name("lookup_crate", "serde_json").is_ok());
847        assert!(validate_crate_name("lookup_crate", "tracing-subscriber").is_ok());
848        assert!(validate_crate_name("lookup_crate", "  tokio  ").is_ok());
849    }
850
851    #[test]
852    fn test_validate_crate_name_rejects_invalid() {
853        assert!(validate_crate_name("lookup_crate", "").is_err());
854        assert!(validate_crate_name("lookup_crate", "   ").is_err());
855        assert!(validate_crate_name("lookup_crate", "../etc/passwd").is_err());
856        assert!(validate_crate_name("lookup_crate", "foo/bar").is_err());
857        assert!(validate_crate_name("lookup_crate", "foo bar").is_err());
858        assert!(validate_crate_name("lookup_crate", "foo;rm").is_err());
859        assert!(validate_crate_name("lookup_crate", &"a".repeat(65)).is_err());
860    }
861
862    #[test]
863    fn test_normalize_version_strips_leading_v() {
864        assert_eq!(normalize_version("v1.2.3"), "1.2.3");
865        assert_eq!(normalize_version("V2.0.0"), "2.0.0");
866        assert_eq!(normalize_version("  v1.0  "), "1.0");
867        // Already canonical / non-version identifiers are untouched.
868        assert_eq!(normalize_version("1.0.0"), "1.0.0");
869        assert_eq!(normalize_version("latest"), "latest");
870        // A leading 'v' not followed by a digit is part of the identifier.
871        assert_eq!(normalize_version("vendored"), "vendored");
872        assert_eq!(normalize_version("v"), "v");
873    }
874
875    #[test]
876    fn test_validate_version_accepts_valid() {
877        assert!(validate_version("lookup_crate", None).is_ok());
878        assert!(validate_version("lookup_crate", Some("1.0.0")).is_ok());
879        assert!(validate_version("lookup_crate", Some("1.0.0-rc.1")).is_ok());
880        assert!(validate_version("lookup_crate", Some("1.0.0+build.5")).is_ok());
881        assert!(validate_version("lookup_crate", Some("latest")).is_ok());
882        assert!(validate_version("lookup_crate", Some("  1.2.3  ")).is_ok());
883    }
884
885    #[test]
886    fn test_validate_version_rejects_invalid() {
887        assert!(validate_version("lookup_crate", Some("")).is_err());
888        assert!(validate_version("lookup_crate", Some("../../1.0")).is_err());
889        assert!(validate_version("lookup_crate", Some("1.0/2.0")).is_err());
890        assert!(validate_version("lookup_crate", Some("1.0 0")).is_err());
891        assert!(validate_version("lookup_crate", Some("..")).is_err());
892        assert!(validate_version("lookup_crate", Some(&"1".repeat(65))).is_err());
893    }
894
895    #[test]
896    fn test_validate_item_path_accepts_valid() {
897        assert!(validate_item_path("lookup_item", "Serialize").is_ok());
898        assert!(validate_item_path("lookup_item", "serde::Serialize").is_ok());
899        assert!(validate_item_path("lookup_item", "std::vec::Vec::push").is_ok());
900        assert!(validate_item_path("lookup_item", "collections::HashMap").is_ok());
901        assert!(validate_item_path("lookup_item", "u32").is_ok());
902        assert!(validate_item_path("lookup_item", "  tokio::main  ").is_ok());
903    }
904
905    #[test]
906    fn test_validate_item_path_rejects_invalid() {
907        assert!(validate_item_path("lookup_item", "").is_err());
908        assert!(validate_item_path("lookup_item", "   ").is_err());
909        assert!(validate_item_path("lookup_item", "../../etc/passwd").is_err());
910        assert!(validate_item_path("lookup_item", "serde/Serialize").is_err());
911        assert!(validate_item_path("lookup_item", "serde::Ser ialize").is_err());
912        assert!(validate_item_path("lookup_item", "foo;rm").is_err());
913        assert!(validate_item_path("lookup_item", "foo.bar").is_err());
914        assert!(validate_item_path("lookup_item", &"a".repeat(257)).is_err());
915        // Single-colon separators and empty path segments are malformed.
916        assert!(validate_item_path("lookup_item", "serde:Serialize").is_err());
917        assert!(validate_item_path("lookup_item", "serde::").is_err());
918        assert!(validate_item_path("lookup_item", "::Serialize").is_err());
919        assert!(validate_item_path("lookup_item", "std:::vec").is_err());
920    }
921
922    #[test]
923    fn test_validate_search_query_accepts_valid() {
924        assert!(validate_search_query("search_crates", "serde").is_ok());
925        assert!(validate_search_query("search_crates", "web framework").is_ok());
926        assert!(validate_search_query("search_crates", "  tokio  ").is_ok());
927        assert!(validate_search_query("search_crates", &"a".repeat(200)).is_ok());
928    }
929
930    #[test]
931    fn test_validate_search_query_rejects_invalid() {
932        assert!(validate_search_query("search_crates", "").is_err());
933        assert!(validate_search_query("search_crates", "   ").is_err());
934        assert!(validate_search_query("search_crates", &"a".repeat(201)).is_err());
935    }
936
937    #[test]
938    fn test_item_url_candidates_include_attr_macro() {
939        // Attribute proc-macros (e.g. async-trait's #[async_trait]) live at
940        // attr.<name>.html and must be among the probed candidates.
941        let c = build_docs_item_url_candidates("async-trait", None, "async_trait");
942        assert!(
943            c.iter()
944                .any(|u| u.ends_with("/async_trait/attr.async_trait.html")),
945            "missing attr candidate: {c:?}"
946        );
947    }
948
949    #[test]
950    fn test_item_url_candidates_strip_redundant_crate_segment() {
951        let c = build_docs_item_url_candidates("serde", None, "serde::Serialize");
952        assert!(c
953            .iter()
954            .any(|u| u.ends_with("/serde/latest/serde/trait.Serialize.html")));
955        assert!(c
956            .iter()
957            .any(|u| u.ends_with("/serde/latest/serde/struct.Serialize.html")));
958        // module fallback candidate is last
959        assert!(c
960            .last()
961            .unwrap()
962            .ends_with("/serde/latest/serde/Serialize/index.html"));
963    }
964
965    #[test]
966    fn test_item_url_candidates_nested_module_and_version() {
967        let c = build_docs_item_url_candidates("serde", Some("1.0.0"), "de::Deserializer");
968        assert!(c
969            .iter()
970            .any(|u| u.ends_with("/serde/1.0.0/serde/de/trait.Deserializer.html")));
971    }
972
973    #[test]
974    fn test_item_url_candidates_hyphen_crate_uses_underscore_path() {
975        let c = build_docs_item_url_candidates("serde-with", None, "As");
976        // First path component keeps the crate name; the lib path uses underscores.
977        assert!(c
978            .iter()
979            .any(|u| u.ends_with("/serde-with/latest/serde_with/struct.As.html")));
980    }
981
982    #[test]
983    fn test_item_url_candidates_empty_path() {
984        assert!(build_docs_item_url_candidates("serde", None, "   ").is_empty());
985    }
986
987    #[test]
988    fn test_all_items_url() {
989        assert_eq!(
990            build_docs_all_items_url("tokio", None),
991            "https://docs.rs/tokio/latest/tokio/all.html"
992        );
993        assert_eq!(
994            build_docs_all_items_url("foo-bar", Some("1.2.3")),
995            "https://docs.rs/foo-bar/1.2.3/foo_bar/all.html"
996        );
997    }
998
999    #[test]
1000    fn test_is_rust_std_crate() {
1001        for c in ["std", "core", "alloc", "proc_macro", "proc-macro", "test"] {
1002            assert!(is_rust_std_crate(c), "{c} should be a std crate");
1003        }
1004        for c in ["serde", "tokio", "anyhow", "stdweb"] {
1005            assert!(!is_rust_std_crate(c), "{c} should not be a std crate");
1006        }
1007    }
1008
1009    #[test]
1010    fn test_std_crate_honors_explicit_version() {
1011        // doc.rust-lang.org serves versioned docs; an explicit version must not
1012        // be silently dropped for std-family crates.
1013        assert_eq!(
1014            build_docs_url("std", Some("1.75.0")),
1015            "https://doc.rust-lang.org/1.75.0/std/"
1016        );
1017        assert_eq!(
1018            build_docs_all_items_url("core", Some("1.75.0")),
1019            "https://doc.rust-lang.org/1.75.0/core/all.html"
1020        );
1021        let c = build_docs_item_url_candidates("std", Some("1.75.0"), "collections::HashMap");
1022        assert!(
1023            c.contains(
1024                &"https://doc.rust-lang.org/1.75.0/std/collections/struct.HashMap.html".to_string()
1025            ),
1026            "versioned std candidate missing: {c:?}"
1027        );
1028        // "latest" and None fall back to the unversioned current docs.
1029        assert_eq!(
1030            build_docs_url("std", Some("latest")),
1031            "https://doc.rust-lang.org/std/"
1032        );
1033    }
1034
1035    #[test]
1036    fn test_std_crate_uses_rust_lang_host() {
1037        // Crate page, item candidates, and all.html for std crates must target
1038        // doc.rust-lang.org (they are not published to docs.rs).
1039        assert_eq!(
1040            build_docs_url("std", None),
1041            "https://doc.rust-lang.org/std/"
1042        );
1043        assert_eq!(
1044            build_docs_all_items_url("core", None),
1045            "https://doc.rust-lang.org/core/all.html"
1046        );
1047        let c = build_docs_item_url_candidates("std", None, "collections::HashMap");
1048        assert!(
1049            c.iter()
1050                .all(|u| u.starts_with("https://doc.rust-lang.org/std/collections/")),
1051            "candidates not on rust-lang host: {c:?}"
1052        );
1053        assert!(
1054            c.contains(
1055                &"https://doc.rust-lang.org/std/collections/struct.HashMap.html".to_string()
1056            ),
1057            "missing HashMap struct candidate: {c:?}"
1058        );
1059    }
1060
1061    #[test]
1062    fn test_find_item_url_in_all_html_reexport() {
1063        let html = r#"<a href="task/fn.spawn.html">task::spawn</a>"#;
1064        let url = find_item_url_in_all_html("tokio", None, html, "spawn");
1065        assert_eq!(
1066            url.as_deref(),
1067            Some("https://docs.rs/tokio/latest/tokio/task/fn.spawn.html")
1068        );
1069    }
1070
1071    #[test]
1072    fn test_find_item_url_in_all_html_root_struct() {
1073        let html = r#"<a href="struct.Builder.html">Builder</a>"#;
1074        let url = find_item_url_in_all_html("foo", Some("0.1.0"), html, "Builder");
1075        assert_eq!(
1076            url.as_deref(),
1077            Some("https://docs.rs/foo/0.1.0/foo/struct.Builder.html")
1078        );
1079    }
1080
1081    #[test]
1082    fn test_find_item_url_in_all_html_std_routes_to_rust_lang() {
1083        // std/core/alloc re-export fallbacks must target doc.rust-lang.org,
1084        // not docs.rs (which always 404s for the standard library).
1085        let html = r#"<a href="task/fn.spawn.html">task::spawn</a>"#;
1086        let url = find_item_url_in_all_html("std", None, html, "spawn");
1087        assert_eq!(
1088            url.as_deref(),
1089            Some("https://doc.rust-lang.org/std/task/fn.spawn.html")
1090        );
1091        // An explicit version is honored and embedded in the path
1092        // (doc.rust-lang.org/{version}/{krate}/...).
1093        let core_html = r#"<a href="future/trait.Future.html">Future</a>"#;
1094        let core_url = find_item_url_in_all_html("core", Some("1.0.0"), core_html, "Future");
1095        assert_eq!(
1096            core_url.as_deref(),
1097            Some("https://doc.rust-lang.org/1.0.0/core/future/trait.Future.html")
1098        );
1099    }
1100
1101    #[test]
1102    fn test_find_item_url_in_all_html_no_match() {
1103        let html = r#"<a href="struct.Other.html">Other</a>"#;
1104        assert!(find_item_url_in_all_html("foo", None, html, "spawn").is_none());
1105        assert!(find_item_url_in_all_html("foo", None, html, "").is_none());
1106    }
1107
1108    #[test]
1109    fn test_summarize_http_status_not_found() {
1110        let msg = summarize_http_status(
1111            reqwest::StatusCode::NOT_FOUND,
1112            "<!DOCTYPE html><html><body>The requested crate does not exist</body></html>",
1113        );
1114        assert!(msg.contains("HTTP 404 Not Found"));
1115        assert!(msg.contains("does not exist on docs.rs"));
1116        // The full HTML body must never be echoed back.
1117        assert!(!msg.contains("<html"));
1118        assert!(!msg.contains("<!DOCTYPE"));
1119    }
1120
1121    #[test]
1122    fn test_summarize_http_status_hides_html_body() {
1123        let msg = summarize_http_status(
1124            reqwest::StatusCode::INTERNAL_SERVER_ERROR,
1125            "<html><body>boom</body></html>",
1126        );
1127        assert_eq!(msg, "HTTP 500 Internal Server Error");
1128    }
1129
1130    #[test]
1131    fn test_summarize_http_status_includes_short_plain_body() {
1132        let msg = summarize_http_status(reqwest::StatusCode::BAD_GATEWAY, "upstream timeout");
1133        assert_eq!(msg, "HTTP 502 Bad Gateway - upstream timeout");
1134    }
1135
1136    #[test]
1137    fn test_summarize_http_status_empty_body() {
1138        let msg = summarize_http_status(reqwest::StatusCode::SERVICE_UNAVAILABLE, "   ");
1139        assert_eq!(msg, "HTTP 503 Service Unavailable");
1140    }
1141
1142    #[test]
1143    fn test_doc_service_default() {
1144        let service = DocService::default();
1145        let _ = service.client();
1146        // HTTP client is always available after service creation
1147    }
1148
1149    #[test]
1150    fn test_doc_service_accessors() {
1151        let service = DocService::default();
1152        let _ = service.client();
1153        let _ = service.client();
1154        let _ = service.cache();
1155        let _ = service.doc_cache();
1156    }
1157
1158    #[test]
1159    fn test_parse_format_none() {
1160        assert_eq!(
1161            parse_format("lookup_crate", None, ALL).unwrap(),
1162            Format::Markdown
1163        );
1164    }
1165
1166    #[test]
1167    fn test_parse_format_markdown() {
1168        assert_eq!(
1169            parse_format("lookup_crate", Some("markdown"), ALL).unwrap(),
1170            Format::Markdown
1171        );
1172        assert_eq!(
1173            parse_format("lookup_crate", Some("MARKDOWN"), ALL).unwrap(),
1174            Format::Markdown
1175        );
1176        assert_eq!(
1177            parse_format("lookup_crate", Some("Markdown"), ALL).unwrap(),
1178            Format::Markdown
1179        );
1180    }
1181
1182    #[test]
1183    fn test_parse_format_text() {
1184        assert_eq!(
1185            parse_format("lookup_crate", Some("text"), ALL).unwrap(),
1186            Format::Text
1187        );
1188        assert_eq!(
1189            parse_format("lookup_crate", Some("TEXT"), ALL).unwrap(),
1190            Format::Text
1191        );
1192    }
1193
1194    #[test]
1195    fn test_parse_format_html() {
1196        assert_eq!(
1197            parse_format("lookup_crate", Some("html"), ALL).unwrap(),
1198            Format::Html
1199        );
1200        assert_eq!(
1201            parse_format("lookup_crate", Some("HTML"), ALL).unwrap(),
1202            Format::Html
1203        );
1204    }
1205
1206    #[test]
1207    fn test_parse_format_json() {
1208        assert_eq!(
1209            parse_format("lookup_crate", Some("json"), ALL).unwrap(),
1210            Format::Json
1211        );
1212        assert_eq!(
1213            parse_format("lookup_crate", Some("JSON"), ALL).unwrap(),
1214            Format::Json
1215        );
1216    }
1217
1218    #[test]
1219    fn test_parse_format_trims_whitespace() {
1220        // Surrounding whitespace is tolerated (consistent with sort
1221        // normalization) so e.g. " markdown " parses like "markdown".
1222        assert_eq!(
1223            parse_format("lookup_crate", Some(" markdown "), ALL).unwrap(),
1224            Format::Markdown
1225        );
1226        assert_eq!(
1227            parse_format("lookup_crate", Some("\tjson\n"), ALL).unwrap(),
1228            Format::Json
1229        );
1230        // Whitespace-only input still trims to empty and is rejected.
1231        assert!(parse_format("lookup_crate", Some("   "), ALL).is_err());
1232    }
1233
1234    #[test]
1235    fn test_parse_format_invalid() {
1236        assert!(parse_format("lookup_crate", Some("invalid"), ALL).is_err());
1237        assert!(parse_format("lookup_crate", Some("xml"), ALL).is_err());
1238        assert!(parse_format("lookup_crate", Some(""), ALL).is_err());
1239    }
1240
1241    #[test]
1242    fn test_parse_format_rejects_unsupported_for_tool() {
1243        // `html` is a valid format string but not supported by search; the
1244        // error must advertise only the formats search actually accepts and
1245        // must not over-advertise html.
1246        let err = parse_format("search_crates", Some("html"), SEARCH_FORMATS).unwrap_err();
1247        let msg = err.to_string();
1248        assert!(
1249            msg.contains("This tool supports: markdown, text, json"),
1250            "got: {msg}"
1251        );
1252        assert!(!msg.contains("text, html"), "over-advertises html: {msg}");
1253
1254        // `json` is valid but unsupported by the doc lookup tools.
1255        let err = parse_format("lookup_crate", Some("json"), DOC_FORMATS).unwrap_err();
1256        assert!(
1257            err.to_string()
1258                .contains("This tool supports: markdown, text, html"),
1259            "got: {err}"
1260        );
1261
1262        // Unknown formats are rejected against the same per-tool allowed list.
1263        let err = parse_format("search_crates", Some("xml"), SEARCH_FORMATS).unwrap_err();
1264        assert!(
1265            err.to_string().contains("markdown, text, json"),
1266            "got: {err}"
1267        );
1268
1269        // Supported formats still parse.
1270        assert_eq!(
1271            parse_format("search_crates", Some("json"), SEARCH_FORMATS).unwrap(),
1272            Format::Json
1273        );
1274        assert_eq!(
1275            parse_format("lookup_crate", Some("html"), DOC_FORMATS).unwrap(),
1276            Format::Html
1277        );
1278    }
1279
1280    #[test]
1281    fn test_format_display() {
1282        assert_eq!(Format::Markdown.to_string(), "markdown");
1283        assert_eq!(Format::Text.to_string(), "text");
1284        assert_eq!(Format::Html.to_string(), "html");
1285        assert_eq!(Format::Json.to_string(), "json");
1286    }
1287
1288    #[test]
1289    fn test_format_default() {
1290        assert_eq!(Format::default(), Format::Markdown);
1291    }
1292
1293    // URL building tests
1294    #[test]
1295    fn test_build_docs_url_without_version() {
1296        std::env::set_var("CRATES_DOCS_DOCS_RS_URL", "https://docs.rs");
1297        let url = build_docs_url("serde", None);
1298        assert_eq!(url, "https://docs.rs/serde/");
1299        std::env::remove_var("CRATES_DOCS_DOCS_RS_URL");
1300    }
1301
1302    #[test]
1303    fn test_build_docs_url_with_version() {
1304        std::env::set_var("CRATES_DOCS_DOCS_RS_URL", "https://docs.rs");
1305        let url = build_docs_url("serde", Some("1.0.0"));
1306        assert_eq!(url, "https://docs.rs/serde/1.0.0/");
1307        std::env::remove_var("CRATES_DOCS_DOCS_RS_URL");
1308    }
1309
1310    #[test]
1311    fn test_build_docs_item_url_without_version() {
1312        std::env::set_var("CRATES_DOCS_DOCS_RS_URL", "https://docs.rs");
1313        let url = build_docs_item_url("serde", None, "Serialize");
1314        assert_eq!(url, "https://docs.rs/serde/?search=Serialize");
1315        std::env::remove_var("CRATES_DOCS_DOCS_RS_URL");
1316    }
1317
1318    #[test]
1319    fn test_build_docs_item_url_with_version() {
1320        std::env::set_var("CRATES_DOCS_DOCS_RS_URL", "https://docs.rs");
1321        let url = build_docs_item_url("serde", Some("1.0.0"), "Serialize");
1322        assert_eq!(url, "https://docs.rs/serde/1.0.0/?search=Serialize");
1323        std::env::remove_var("CRATES_DOCS_DOCS_RS_URL");
1324    }
1325
1326    #[test]
1327    fn test_build_docs_item_url_encodes_special_chars() {
1328        std::env::set_var("CRATES_DOCS_DOCS_RS_URL", "https://docs.rs");
1329        let url = build_docs_item_url("std", None, "collections::HashMap");
1330        assert!(url.contains("collections%3A%3AHashMap"));
1331        std::env::remove_var("CRATES_DOCS_DOCS_RS_URL");
1332    }
1333
1334    #[test]
1335    fn test_build_crates_io_search_url_defaults() {
1336        std::env::set_var("CRATES_DOCS_CRATES_IO_URL", "https://crates.io");
1337        let url = build_crates_io_search_url("web framework", None, None);
1338        assert!(url.contains("crates.io/api/v1/crates"));
1339        assert!(url.contains("q=web+framework") || url.contains("q=web%20framework"));
1340        assert!(url.contains("per_page=10"));
1341        assert!(url.contains("sort=relevance"));
1342        std::env::remove_var("CRATES_DOCS_CRATES_IO_URL");
1343    }
1344
1345    #[test]
1346    fn test_build_crates_io_search_url_with_params() {
1347        std::env::set_var("CRATES_DOCS_CRATES_IO_URL", "https://crates.io");
1348        let url = build_crates_io_search_url("async", Some("downloads"), Some(20));
1349        assert!(url.contains("crates.io/api/v1/crates"));
1350        assert!(url.contains("q=async"));
1351        assert!(url.contains("per_page=20"));
1352        assert!(url.contains("sort=downloads"));
1353        std::env::remove_var("CRATES_DOCS_CRATES_IO_URL");
1354    }
1355
1356    #[test]
1357    fn test_build_crates_io_search_url_encodes_query() {
1358        std::env::set_var("CRATES_DOCS_CRATES_IO_URL", "https://crates.io");
1359        let url = build_crates_io_search_url("web framework", None, None);
1360        assert!(url.contains("web+framework") || url.contains("web%20framework"));
1361        std::env::remove_var("CRATES_DOCS_CRATES_IO_URL");
1362    }
1363    #[test]
1364    fn test_validation_errors_report_their_tool_name() {
1365        // Regression: argument-validation errors must name the *tool*
1366        // (e.g. "lookup_crate"), not the offending field (e.g. "crate_name"),
1367        // so callers see "Invalid arguments for tool 'lookup_crate'".
1368        let err = validate_crate_name("lookup_crate", "../etc/passwd").unwrap_err();
1369        assert!(
1370            err.to_string().contains("lookup_crate"),
1371            "expected tool name in error, got: {err}"
1372        );
1373
1374        let err = validate_version("lookup_crate", Some("1.0/2.0")).unwrap_err();
1375        assert!(err.to_string().contains("lookup_crate"), "got: {err}");
1376
1377        let err = validate_item_path("lookup_item", "foo/bar").unwrap_err();
1378        assert!(err.to_string().contains("lookup_item"), "got: {err}");
1379
1380        let err = validate_search_query("search_crates", "").unwrap_err();
1381        assert!(err.to_string().contains("search_crates"), "got: {err}");
1382
1383        let err = parse_format("lookup_crate", Some("xml"), ALL).unwrap_err();
1384        assert!(err.to_string().contains("lookup_crate"), "got: {err}");
1385    }
1386}