docsearch/lib.rs
1//! Use the latest search index from `rustdoc` to find the docs.rs (or stdlib) URL for any item in a
2//! crate by its [simple path](https://doc.rust-lang.org/stable/reference/paths.html#simple-paths).
3//!
4//! # Example
5//!
6//! Please have a look at the [`start_search`] function for an example of how to use this crate, as
7//! it is the main entry point. In addition, you can check out the `examples` directory in the
8//! repository.
9//!
10//! # Feature flags
11//!
12//! The following features flags enable support for older versions of the search index. If they're
13//! not enabled, the retrieving the [`Index`] for a crate might fail. These should be enabled or
14//! disabled based on the requirements to what crates will be searched for (if known).
15//!
16//! The features listed are **enabled by default**.
17//!
18//! - `index-v2` enables support to parse the slightly outdated index format. This is needed if
19//! parsing of older crates that haven't be update in a while is required.
20//! - `index-v1` enables support for the even older index format. Nowadays it's rarely found and
21//! this is only needed to parse very old crates that haven't been updated in a long while.
22#![forbid(unsafe_code)]
23#![deny(
24 rust_2018_idioms,
25 clippy::all,
26 clippy::pedantic,
27 clippy::print_stderr,
28 clippy::print_stdout
29)]
30#![allow(clippy::missing_errors_doc)]
31
32use std::{borrow::Cow, collections::BTreeMap};
33
34use serde::{Deserialize, Serialize};
35
36use crate::error::{Error, Result};
37pub use crate::{simple_path::SimplePath, version::Version};
38
39mod crates;
40pub mod error;
41mod index;
42mod simple_path;
43mod version;
44
45/// List of crates in the stdlib index.
46pub(crate) const STD_CRATES: &[&str] = &["alloc", "core", "proc_macro", "std", "test"];
47
48/// Parsed crate index that contains the mappings from [`SimplePath`]s to their URL for direct
49/// linking.
50#[derive(Debug, PartialEq, Eq, Hash, Serialize, Deserialize)]
51pub struct Index {
52 /// Name of the crate.
53 pub name: String,
54 /// Version of the crate.
55 pub version: Version,
56 /// Mapping from simple paths to URL paths.
57 pub mapping: BTreeMap<String, String>,
58 /// Whether this index is for the standard library.
59 pub std: bool,
60}
61
62impl Index {
63 #[must_use]
64 pub fn find_link(&self, path: &SimplePath) -> Option<String> {
65 let link = if path.is_crate_only() {
66 path.crate_name()
67 } else {
68 self.mapping.get(path.as_ref())?
69 };
70
71 Some(if self.std {
72 format!("https://doc.rust-lang.org/nightly/{link}")
73 } else {
74 format!("https://docs.rs/{}/{}/{link}", self.name, self.version)
75 })
76 }
77}
78
79/// Search for the given crate name and optionally a fixed version. This is the main entry point to
80/// retrieve an [`Index`] and further query that index for [`SimplePath`]s.
81///
82/// # Example
83///
84/// Download the index for the `anyhow` crate and get the docs.rs link for the `anyhow::Result`
85/// item.
86///
87/// ```no_run
88/// use anyhow::Result;
89/// use docsearch::{SimplePath, Version};
90///
91/// #[tokio::main(flavor = "current_thread")]
92/// async fn main() -> Result<()> {
93/// // First parse the search query into a `SimplePath`. This ensures the query is actually
94/// // usable and allows to provide additional info.
95/// let query = "anyhow::Result".parse::<SimplePath>().unwrap();
96///
97/// // Initiate a new search. It allows to not depend on a specific HTTP crate and instead
98/// // pass the task to the developer (that's you).
99/// let state = docsearch::start_search(query.crate_name(), Version::Latest);
100/// // First, download the HTML page content to find the URL to the search index.
101/// let content = download_url(state.url()).await?;
102///
103/// // Now try to find the the link to the actual search index.
104/// let state = state.find_index(&content)?;
105/// // Next, download the search index content.
106/// let content = download_url(state.url()).await?;
107///
108/// // Lastly, transform the search index content into an `Index` instance, containing all
109/// // information to create webpage links to an item within the scope of the requested crate.
110/// let index = state.transform_index(&content)?;
111///
112/// // Now we can use the index to query for our initial item.
113/// let link = index.find_link(&query).unwrap();
114///
115/// // And print out the resolved web link to it.
116/// println!("{link}");
117///
118/// Ok(())
119/// }
120///
121/// /// Simple helper function to download any HTTP page with `reqwest`, using a normal GET request.
122/// async fn download_url(url: &str) -> Result<String> {
123/// reqwest::Client::builder()
124/// .redirect(reqwest::redirect::Policy::limited(10))
125/// .build()?
126/// .get(url)
127/// .send()
128/// .await?
129/// .error_for_status()?
130/// .text()
131/// .await
132/// .map_err(Into::into)
133/// }
134/// ```
135#[must_use]
136pub fn start_search(name: &str, version: Version) -> SearchPage<'_> {
137 let std = STD_CRATES.contains(&name);
138 let url = crates::get_page_url(std, name, &version);
139
140 SearchPage {
141 name,
142 version,
143 std,
144 url,
145 }
146}
147
148/// Initial state when starting a new search. Use the [`Self::url`] function to get the URL to
149/// download content from. The web page content must then be passed to [`Self::find_index`] to get
150/// to the next state.
151pub struct SearchPage<'a> {
152 name: &'a str,
153 version: Version,
154 std: bool,
155 url: Cow<'static, str>,
156}
157
158impl<'a> SearchPage<'a> {
159 /// URL to content that should be retrieved and passed to [`Self::find_index`].
160 #[must_use]
161 pub fn url(&self) -> &str {
162 &self.url
163 }
164
165 /// Try to find the index in the content downloaded from [`Self::url`], effectively transferring
166 /// to the next state in retrieving an `Index` instance.
167 pub fn find_index(self, body: &str) -> Result<SearchIndex<'a>> {
168 let (version, url) = crates::find_index_url(self.std, self.name, self.version, body)?;
169
170 Ok(SearchIndex {
171 name: self.name,
172 version,
173 std: self.std,
174 url,
175 })
176 }
177}
178
179/// Second and last state in retrieving a search index. Use the [`Self::url`] function to get the
180/// search index URL to download. The index's content must be passed to [`Self::transform_index`] to
181/// create the final [`Index`] instance.
182pub struct SearchIndex<'a> {
183 name: &'a str,
184 version: Version,
185 std: bool,
186 url: String,
187}
188
189impl<'a> SearchIndex<'a> {
190 /// URL to the search index that should be retrieved and passed to [`Self::transform_index`].
191 #[must_use]
192 pub fn url(&self) -> &str {
193 &self.url
194 }
195
196 /// Try to transform the raw index content into a simple "path-to-URL" mapping for each
197 /// contained crate.
198 pub fn transform_index(self, index_content: &str) -> Result<Index> {
199 let mappings = index::load(index_content)?;
200
201 mappings
202 .into_iter()
203 .find(|(crate_name, _)| crate_name == self.name)
204 .map(|(name, mapping)| Index {
205 name,
206 version: self.version.clone(),
207 mapping,
208 std: self.std,
209 })
210 .ok_or(Error::CrateDataMissing)
211 }
212}