docsearch/
lib.rs

1//! Use the latest search index from `rustdoc` to find the docs.rs (or stdlib) URL for any item in a
2//! crate by its [simple path](https://doc.rust-lang.org/stable/reference/paths.html#simple-paths).
3//!
4//! # Example
5//!
6//! Please have a look at the [`start_search`] function for an example of how to use this crate, as
7//! it is the main entry point. In addition, you can check out the `examples` directory in the
8//! repository.
9//!
10//! # Feature flags
11//!
12//! The following features flags enable support for older versions of the search index. If they're
13//! not enabled, the retrieving the [`Index`] for a crate might fail. These should be enabled or
14//! disabled based on the requirements to what crates will be searched for (if known).
15//!
16//! The features listed are **enabled by default**.
17//!
18//! - `index-v2` enables support to parse the slightly outdated index format. This is needed if
19//! parsing of older crates that haven't be update in a while is required.
20//! - `index-v1` enables support for the even older index format. Nowadays it's rarely found and
21//! this is only needed to parse very old crates that haven't been updated in a long while.
22#![forbid(unsafe_code)]
23#![deny(
24    rust_2018_idioms,
25    clippy::all,
26    clippy::pedantic,
27    clippy::print_stderr,
28    clippy::print_stdout
29)]
30#![allow(clippy::missing_errors_doc)]
31
32use std::{borrow::Cow, collections::BTreeMap};
33
34use serde::{Deserialize, Serialize};
35
36use crate::error::{Error, Result};
37pub use crate::{simple_path::SimplePath, version::Version};
38
39mod crates;
40pub mod error;
41mod index;
42mod simple_path;
43mod version;
44
45/// List of crates in the stdlib index.
46pub(crate) const STD_CRATES: &[&str] = &["alloc", "core", "proc_macro", "std", "test"];
47
48/// Parsed crate index that contains the mappings from [`SimplePath`]s to their URL for direct
49/// linking.
50#[derive(Debug, PartialEq, Eq, Hash, Serialize, Deserialize)]
51pub struct Index {
52    /// Name of the crate.
53    pub name: String,
54    /// Version of the crate.
55    pub version: Version,
56    /// Mapping from simple paths to URL paths.
57    pub mapping: BTreeMap<String, String>,
58    /// Whether this index is for the standard library.
59    pub std: bool,
60}
61
62impl Index {
63    #[must_use]
64    pub fn find_link(&self, path: &SimplePath) -> Option<String> {
65        let link = if path.is_crate_only() {
66            path.crate_name()
67        } else {
68            self.mapping.get(path.as_ref())?
69        };
70
71        Some(if self.std {
72            format!("https://doc.rust-lang.org/nightly/{link}")
73        } else {
74            format!("https://docs.rs/{}/{}/{link}", self.name, self.version)
75        })
76    }
77}
78
79/// Search for the given crate name and optionally a fixed version. This is the main entry point to
80/// retrieve an [`Index`] and further query that index for [`SimplePath`]s.
81///
82/// # Example
83///
84/// Download the index for the `anyhow` crate and get the docs.rs link for the `anyhow::Result`
85/// item.
86///
87/// ```no_run
88/// use anyhow::Result;
89/// use docsearch::{SimplePath, Version};
90///
91/// #[tokio::main(flavor = "current_thread")]
92/// async fn main() -> Result<()> {
93///     // First parse the search query into a `SimplePath`. This ensures the query is actually
94///     // usable and allows to provide additional info.
95///     let query = "anyhow::Result".parse::<SimplePath>().unwrap();
96///
97///     // Initiate a new search. It allows to not depend on a specific HTTP crate and instead
98///     // pass the task to the developer (that's you).
99///     let state = docsearch::start_search(query.crate_name(), Version::Latest);
100///     // First, download the HTML page content to find the URL to the search index.
101///     let content = download_url(state.url()).await?;
102///
103///     // Now try to find the the link to the actual search index.
104///     let state = state.find_index(&content)?;
105///     // Next, download the search index content.
106///     let content = download_url(state.url()).await?;
107///
108///     // Lastly, transform the search index content into an `Index` instance, containing all
109///     // information to create webpage links to an item within the scope of the requested crate.
110///     let index = state.transform_index(&content)?;
111///
112///     // Now we can use the index to query for our initial item.
113///     let link = index.find_link(&query).unwrap();
114///
115///     // And print out the resolved web link to it.
116///     println!("{link}");
117///
118///     Ok(())
119/// }
120///
121/// /// Simple helper function to download any HTTP page with `reqwest`, using a normal GET request.
122/// async fn download_url(url: &str) -> Result<String> {
123///     reqwest::Client::builder()
124///         .redirect(reqwest::redirect::Policy::limited(10))
125///         .build()?
126///         .get(url)
127///         .send()
128///         .await?
129///         .error_for_status()?
130///         .text()
131///         .await
132///         .map_err(Into::into)
133/// }
134/// ```
135#[must_use]
136pub fn start_search(name: &str, version: Version) -> SearchPage<'_> {
137    let std = STD_CRATES.contains(&name);
138    let url = crates::get_page_url(std, name, &version);
139
140    SearchPage {
141        name,
142        version,
143        std,
144        url,
145    }
146}
147
148/// Initial state when starting a new search. Use the [`Self::url`] function to get the URL to
149/// download content from. The web page content must then be passed to [`Self::find_index`] to get
150/// to the next state.
151pub struct SearchPage<'a> {
152    name: &'a str,
153    version: Version,
154    std: bool,
155    url: Cow<'static, str>,
156}
157
158impl<'a> SearchPage<'a> {
159    /// URL to content that should be retrieved and passed to [`Self::find_index`].
160    #[must_use]
161    pub fn url(&self) -> &str {
162        &self.url
163    }
164
165    /// Try to find the index in the content downloaded from [`Self::url`], effectively transferring
166    /// to the next state in retrieving an `Index` instance.
167    pub fn find_index(self, body: &str) -> Result<SearchIndex<'a>> {
168        let (version, url) = crates::find_index_url(self.std, self.name, self.version, body)?;
169
170        Ok(SearchIndex {
171            name: self.name,
172            version,
173            std: self.std,
174            url,
175        })
176    }
177}
178
179/// Second and last state in retrieving a search index. Use the [`Self::url`] function to get the
180/// search index URL to download. The index's content must be passed to [`Self::transform_index`] to
181/// create the final [`Index`] instance.
182pub struct SearchIndex<'a> {
183    name: &'a str,
184    version: Version,
185    std: bool,
186    url: String,
187}
188
189impl<'a> SearchIndex<'a> {
190    /// URL to the search index that should be retrieved and passed to [`Self::transform_index`].
191    #[must_use]
192    pub fn url(&self) -> &str {
193        &self.url
194    }
195
196    /// Try to transform the raw index content into a simple "path-to-URL" mapping for each
197    /// contained crate.
198    pub fn transform_index(self, index_content: &str) -> Result<Index> {
199        let mappings = index::load(index_content)?;
200
201        mappings
202            .into_iter()
203            .find(|(crate_name, _)| crate_name == self.name)
204            .map(|(name, mapping)| Index {
205                name,
206                version: self.version.clone(),
207                mapping,
208                std: self.std,
209            })
210            .ok_or(Error::CrateDataMissing)
211    }
212}