linkcheck/
lib.rs

1//! A library for extracting and validating links.
2//!
3//! The majority of this code has been extracted from the
4//! [`mdbook-linkcheck`](https://crates.io/crates/mdbook-linkcheck) plugin, so
5//! it may have some bias towards the way `mdbook` works.
6//!
7//! # Examples
8//!
9//! If you were validating links in batches, this is one way to go about it:
10//!
11//! ```rust
12//! use linkcheck::{Link, BasicContext};
13//! use std::path::Path;
14//! use codespan::Files;
15//!
16//! # #[tokio::main] async fn main() {
17//! // first we need somewhere to put the source documents we'll be checking
18//! let mut files = Files::new();
19//!
20//! // then we add some items
21//! let src = r#"
22//! This is some markdown linking to [a website](https://example.com) and
23//! [a file](./README.md).
24//! "#;
25//! let file_id = files.add("blah.md", src);
26//!
27//! // we then need to extract all the links and their location in the document
28//! let links = linkcheck::scanners::markdown(src);
29//!
30//! // at the moment we just have a stream of (&str, Span)... To give nice
31//! // diagnostics we need to turn this into a stream of Links that know which
32//! // document they came from.
33//! let links = links.map(|(url, span)| Link::new(url, span, file_id));
34//!
35//! // we've collected all our links, now it's time for validation!
36//!
37//! // when validating file links we need to know what the current directory is
38//! let current_dir = Path::new(env!("CARGO_MANIFEST_DIR"));
39//!
40//! // the validation process also need some contextual information (e.g. HTTP
41//! // client, file system validation options, and a cache for expensive web
42//! // requests).
43//! //
44//! // Basic users won't need to tweak this in any way, so a default context
45//! // type has been provided for you.
46//! let ctx = BasicContext::default();
47//!
48//! // and now we can run the validation step!
49//! let result = linkcheck::validate(current_dir, links, &ctx).await;
50//!
51//! assert!(result.invalid.is_empty());
52//! assert_eq!(result.valid.len(), 2);
53//! # }
54//! ```
55//!
56//! # Cargo Features
57//!
58//! Extra functionality is accessible by enabling feature flags. The features
59//! currently available are:
60//!
61//! * **serde-1** - Adds `Serialize` and `Deserialize` implementations for use
62//!   with `serde`
63
64#![forbid(unsafe_code)]
65#![deny(
66    missing_docs,
67    missing_debug_implementations,
68    missing_copy_implementations
69)]
70
71#[cfg(test)]
72#[macro_use]
73extern crate pretty_assertions;
74
75pub mod scanners;
76pub mod validation;
77
78pub use validation::{validate, BasicContext};
79
80use codespan::{FileId, Span};
81use http::uri::PathAndQuery;
82use std::path::PathBuf;
83use url::Url;
84
85#[derive(Debug, Clone, PartialEq, Eq)]
86enum Category {
87    /// A local file.
88    FileSystem {
89        path: PathBuf,
90        fragment: Option<String>,
91    },
92    /// A link to somewhere else in the current document.
93    CurrentFile { fragment: String },
94    /// A URL for something on the web.
95    Url(Url),
96    /// A `mailto:` link.
97    MailTo(String),
98}
99
100impl Category {
101    fn categorise(src: &str) -> Option<Self> {
102        if src.is_empty() {
103            return None;
104        }
105
106        let mailto_prefix = "mailto:";
107        if src.starts_with(mailto_prefix) {
108            let address = &src[mailto_prefix.len()..];
109            return Some(Category::MailTo(address.to_string()));
110        }
111
112        if let Ok(url) = src.parse() {
113            return Some(Category::Url(url));
114        }
115
116        if src.starts_with("#") {
117            return Some(Category::CurrentFile {
118                fragment: String::from(&src[1..]),
119            });
120        }
121
122        let (path, fragment) = match src.find("#") {
123            Some(hash) => {
124                let (path, rest) = src.split_at(hash);
125                (path, Some(String::from(&rest[1..])))
126            },
127            None => (src, None),
128        };
129
130        // as a sanity check we use the http crate's PathAndQuery type to make
131        // sure the path is decoded correctly
132        if let Ok(path_and_query) = path.parse::<PathAndQuery>() {
133            return Some(Category::FileSystem {
134                path: PathBuf::from(path_and_query.path()),
135                fragment,
136            });
137        }
138
139        None
140    }
141}
142
143/// A link to some other resource.
144#[derive(Debug, Clone, PartialEq)]
145#[cfg_attr(feature = "serde-1", derive(serde::Serialize, serde::Deserialize))]
146#[non_exhaustive]
147pub struct Link {
148    /// The link itself.
149    pub href: String,
150    /// Where the [`Link`] lies in its source text.
151    pub span: Span,
152    /// Which document does this [`Link`] belong to?
153    pub file: FileId,
154}
155
156impl Link {
157    /// Create a new [`Link`].
158    pub fn new<S: Into<String>>(href: S, span: Span, file: FileId) -> Self {
159        Link {
160            href: href.into(),
161            span,
162            file,
163        }
164    }
165
166    fn category(&self) -> Option<Category> { Category::categorise(&self.href) }
167}
168
169#[cfg(test)]
170mod tests {
171    use super::*;
172
173    #[test]
174    fn parse_into_categories() {
175        let inputs = vec![
176            (
177                "https://example.com/",
178                Some(Category::Url(
179                    Url::parse("https://example.com/").unwrap(),
180                )),
181            ),
182            (
183                "README.md",
184                Some(Category::FileSystem {
185                    path: PathBuf::from("README.md"),
186                    fragment: None,
187                }),
188            ),
189            (
190                "./README.md",
191                Some(Category::FileSystem {
192                    path: PathBuf::from("./README.md"),
193                    fragment: None,
194                }),
195            ),
196            (
197                "./README.md#license",
198                Some(Category::FileSystem {
199                    path: PathBuf::from("./README.md"),
200                    fragment: Some(String::from("license")),
201                }),
202            ),
203            (
204                "mailto:michael@example.com",
205                Some(Category::MailTo(String::from("michael@example.com"))),
206            ),
207        ];
208
209        for (src, should_be) in inputs {
210            let got = Category::categorise(src);
211            assert_eq!(got, should_be, "{}", src);
212        }
213    }
214}