linkcheck/lib.rs
1//! A library for extracting and validating links.
2//!
3//! The majority of this code has been extracted from the
4//! [`mdbook-linkcheck`](https://crates.io/crates/mdbook-linkcheck) plugin, so
5//! it may have some bias towards the way `mdbook` works.
6//!
7//! # Examples
8//!
9//! If you were validating links in batches, this is one way to go about it:
10//!
11//! ```rust
12//! use linkcheck::{Link, BasicContext};
13//! use std::path::Path;
14//! use codespan::Files;
15//!
16//! # #[tokio::main] async fn main() {
17//! // first we need somewhere to put the source documents we'll be checking
18//! let mut files = Files::new();
19//!
20//! // then we add some items
21//! let src = r#"
22//! This is some markdown linking to [a website](https://example.com) and
23//! [a file](./README.md).
24//! "#;
25//! let file_id = files.add("blah.md", src);
26//!
27//! // we then need to extract all the links and their location in the document
28//! let links = linkcheck::scanners::markdown(src);
29//!
30//! // at the moment we just have a stream of (&str, Span)... To give nice
31//! // diagnostics we need to turn this into a stream of Links that know which
32//! // document they came from.
33//! let links = links.map(|(url, span)| Link::new(url, span, file_id));
34//!
35//! // we've collected all our links, now it's time for validation!
36//!
37//! // when validating file links we need to know what the current directory is
38//! let current_dir = Path::new(env!("CARGO_MANIFEST_DIR"));
39//!
40//! // the validation process also need some contextual information (e.g. HTTP
41//! // client, file system validation options, and a cache for expensive web
42//! // requests).
43//! //
44//! // Basic users won't need to tweak this in any way, so a default context
45//! // type has been provided for you.
46//! let ctx = BasicContext::default();
47//!
48//! // and now we can run the validation step!
49//! let result = linkcheck::validate(current_dir, links, &ctx).await;
50//!
51//! assert!(result.invalid.is_empty());
52//! assert_eq!(result.valid.len(), 2);
53//! # }
54//! ```
55//!
56//! # Cargo Features
57//!
58//! Extra functionality is accessible by enabling feature flags. The features
59//! currently available are:
60//!
61//! * **serde-1** - Adds `Serialize` and `Deserialize` implementations for use
62//! with `serde`
63
64#![forbid(unsafe_code)]
65#![deny(
66 missing_docs,
67 missing_debug_implementations,
68 missing_copy_implementations
69)]
70
71#[cfg(test)]
72#[macro_use]
73extern crate pretty_assertions;
74
75pub mod scanners;
76pub mod validation;
77
78pub use validation::{validate, BasicContext};
79
80use codespan::{FileId, Span};
81use http::uri::PathAndQuery;
82use std::path::PathBuf;
83use url::Url;
84
85#[derive(Debug, Clone, PartialEq, Eq)]
86enum Category {
87 /// A local file.
88 FileSystem {
89 path: PathBuf,
90 fragment: Option<String>,
91 },
92 /// A link to somewhere else in the current document.
93 CurrentFile { fragment: String },
94 /// A URL for something on the web.
95 Url(Url),
96 /// A `mailto:` link.
97 MailTo(String),
98}
99
100impl Category {
101 fn categorise(src: &str) -> Option<Self> {
102 if src.is_empty() {
103 return None;
104 }
105
106 let mailto_prefix = "mailto:";
107 if src.starts_with(mailto_prefix) {
108 let address = &src[mailto_prefix.len()..];
109 return Some(Category::MailTo(address.to_string()));
110 }
111
112 if let Ok(url) = src.parse() {
113 return Some(Category::Url(url));
114 }
115
116 if src.starts_with("#") {
117 return Some(Category::CurrentFile {
118 fragment: String::from(&src[1..]),
119 });
120 }
121
122 let (path, fragment) = match src.find("#") {
123 Some(hash) => {
124 let (path, rest) = src.split_at(hash);
125 (path, Some(String::from(&rest[1..])))
126 },
127 None => (src, None),
128 };
129
130 // as a sanity check we use the http crate's PathAndQuery type to make
131 // sure the path is decoded correctly
132 if let Ok(path_and_query) = path.parse::<PathAndQuery>() {
133 return Some(Category::FileSystem {
134 path: PathBuf::from(path_and_query.path()),
135 fragment,
136 });
137 }
138
139 None
140 }
141}
142
143/// A link to some other resource.
144#[derive(Debug, Clone, PartialEq)]
145#[cfg_attr(feature = "serde-1", derive(serde::Serialize, serde::Deserialize))]
146#[non_exhaustive]
147pub struct Link {
148 /// The link itself.
149 pub href: String,
150 /// Where the [`Link`] lies in its source text.
151 pub span: Span,
152 /// Which document does this [`Link`] belong to?
153 pub file: FileId,
154}
155
156impl Link {
157 /// Create a new [`Link`].
158 pub fn new<S: Into<String>>(href: S, span: Span, file: FileId) -> Self {
159 Link {
160 href: href.into(),
161 span,
162 file,
163 }
164 }
165
166 fn category(&self) -> Option<Category> { Category::categorise(&self.href) }
167}
168
169#[cfg(test)]
170mod tests {
171 use super::*;
172
173 #[test]
174 fn parse_into_categories() {
175 let inputs = vec![
176 (
177 "https://example.com/",
178 Some(Category::Url(
179 Url::parse("https://example.com/").unwrap(),
180 )),
181 ),
182 (
183 "README.md",
184 Some(Category::FileSystem {
185 path: PathBuf::from("README.md"),
186 fragment: None,
187 }),
188 ),
189 (
190 "./README.md",
191 Some(Category::FileSystem {
192 path: PathBuf::from("./README.md"),
193 fragment: None,
194 }),
195 ),
196 (
197 "./README.md#license",
198 Some(Category::FileSystem {
199 path: PathBuf::from("./README.md"),
200 fragment: Some(String::from("license")),
201 }),
202 ),
203 (
204 "mailto:michael@example.com",
205 Some(Category::MailTo(String::from("michael@example.com"))),
206 ),
207 ];
208
209 for (src, should_be) in inputs {
210 let got = Category::categorise(src);
211 assert_eq!(got, should_be, "{}", src);
212 }
213 }
214}