mdbook_linkcheck2/
lib.rs

1//! A `mdbook` backend which will check all links in a document are valid.
2//!
3//! The link-checking process has roughly three stages:
4//!
5//! 1. Find all the links in a body of markdown text (see [`extract_links`])
6//! 2. Validate all the links we've found, taking into account cached results
7//!    and configuration options
8//! 3. Cache the results in the output directory for reuse by step 2 in the next
9//!    round
10//! 4. Emit errors/warnings to the user
11
12// Note: older versions of Rust (e.g. v1.46.0) don't know about "rustdoc" lints
13#![allow(unknown_lints)]
14#![deny(
15    rustdoc::broken_intra_doc_links,
16    missing_docs,
17    missing_debug_implementations,
18    missing_copy_implementations
19)]
20
21#[cfg(test)]
22#[macro_use]
23extern crate pretty_assertions;
24
25/// A semver range specifying which versions of `mdbook` this crate supports.
26pub const COMPATIBLE_MDBOOK_VERSIONS: &str = "^0.5.1";
27
28mod config;
29mod context;
30mod hashed_regex;
31mod latex;
32mod links;
33mod validate;
34
35pub use crate::{
36    config::{Config, WarningPolicy},
37    context::Context,
38    hashed_regex::HashedRegex,
39    links::{extract as extract_links, IncompleteLink},
40    validate::{validate, NotInSummary, ValidationOutcome},
41};
42
43use anyhow::{Context as _, Error};
44use codespan::{FileId, Files};
45use codespan_reporting::{
46    diagnostic::{Diagnostic, Severity},
47    term::termcolor::{ColorChoice, StandardStream},
48};
49use linkcheck2::validation::Cache;
50use mdbook_renderer::{
51    book::{Book, BookItem},
52    RenderContext,
53};
54use semver::{Version, VersionReq};
55use std::{fs::File, path::Path};
56
57/// Run the link checking pipeline.
58///
59/// If `selected_files` is `Some`, then web links in the given list of files are
60/// checked, rather than checking links in all files.
61///
62/// If `cache_file` is `Some`, it is used as a cache; otherwise, no caching is
63/// used, and any existing cache is ignored.
64pub fn run(
65    cache_file: Option<&Path>,
66    colour: ColorChoice,
67    ctx: &RenderContext,
68    selected_files: Option<Vec<String>>,
69) -> Result<(), Error> {
70    let mut cache = if let Some(cache_file) = cache_file {
71        load_cache(cache_file)
72    } else {
73        Cache::default()
74    };
75
76    log::info!("Started the link checker");
77    log::debug!("Selected files for web links: {:?}", selected_files);
78
79    let cfg = ctx
80        .config
81        .get("output.linkcheck2")?
82        .unwrap_or_else(|| ctx.config.get("output.linkcheck").ok()?)
83        .unwrap_or_default();
84    crate::version_check(&ctx.version)?;
85
86    if log::log_enabled!(log::Level::Trace) {
87        for line in format!("{:#?}", cfg).lines() {
88            log::trace!("{}", line);
89        }
90    }
91
92    let file_filter = |fname: &Path| {
93        if let Some(ref selected_files) = selected_files {
94            selected_files.contains(&fname.display().to_string())
95        } else {
96            true
97        }
98    };
99
100    let (files, outcome) = check_links(ctx, &mut cache, &cfg, file_filter)?;
101    let diags = outcome.generate_diagnostics(&files, cfg.warning_policy);
102    report_errors(&files, &diags, colour)?;
103
104    if let Some(cache_file) = cache_file {
105        save_cache(cache_file, &cache);
106    }
107
108    if diags.iter().any(|diag| diag.severity >= Severity::Error) {
109        log::info!("{} broken links found", outcome.invalid_links.len());
110        Err(Error::msg("One or more incorrect links"))
111    } else {
112        log::info!("No broken links found");
113        Ok(())
114    }
115}
116
117/// Check whether this library is compatible with the provided version string.
118pub fn version_check(version: &str) -> Result<(), Error> {
119    let constraints = VersionReq::parse(COMPATIBLE_MDBOOK_VERSIONS)?;
120    let found = Version::parse(version)?;
121
122    if constraints.matches(&found) {
123        Ok(())
124    } else {
125        let msg = format!(
126            "mdbook-linkcheck isn't compatible with this version of mdbook ({} is not in the range {})",
127            found, constraints
128        );
129        Err(Error::msg(msg))
130    }
131}
132
133/// A helper for reading the chapters of a [`Book`] into memory.
134pub fn load_files_into_memory<F>(
135    book: &Book,
136    dest: &mut Files<String>,
137    filter: F,
138) -> (Vec<FileId>, Vec<FileId>)
139where
140    F: Fn(&Path) -> bool,
141{
142    let mut filtered_files: Vec<FileId> = Vec::new();
143    let mut all_files: Vec<FileId> = Vec::new();
144
145    for item in book.iter() {
146        match item {
147            BookItem::Chapter(ref ch) => {
148                if let Some(ref path) = ch.path {
149                    let path_str = path.display().to_string();
150                    let content = ch.content.clone();
151                    let id = dest.add(path_str, content);
152                    if filter(path) {
153                        filtered_files.push(id);
154                    }
155                    all_files.push(id);
156                }
157            }
158            BookItem::Separator | BookItem::PartTitle(_) => {}
159        }
160    }
161
162    (filtered_files, all_files)
163}
164
165fn report_errors(
166    files: &Files<String>,
167    diags: &[Diagnostic<FileId>],
168    colour: ColorChoice,
169) -> Result<(), Error> {
170    let mut writer = StandardStream::stderr(colour);
171    let cfg = codespan_reporting::term::Config::default();
172
173    for diag in diags {
174        codespan_reporting::term::emit_to_write_style(&mut writer, &cfg, files, diag)?;
175    }
176
177    Ok(())
178}
179
180fn check_links<F>(
181    ctx: &RenderContext,
182    cache: &mut Cache,
183    cfg: &Config,
184    file_filter: F,
185) -> Result<(Files<String>, ValidationOutcome), Error>
186where
187    F: Fn(&Path) -> bool,
188{
189    log::info!("Scanning book for links");
190    let mut files: Files<String> = Files::new();
191    let (web_check_files_ids, all_file_ids) =
192        crate::load_files_into_memory(&ctx.book, &mut files, file_filter);
193    log::info!(
194        "Loaded {} files, filtered for web checking: {}",
195        all_file_ids.len(),
196        web_check_files_ids.len()
197    );
198    let (links, incomplete_links) = crate::extract_links(cfg, all_file_ids.clone(), &files);
199    log::info!(
200        "Found {} links ({} incomplete links)",
201        links.len(),
202        incomplete_links.len()
203    );
204    let src =
205        dunce::canonicalize(ctx.source_dir()).context("Unable to resolve the source directory")?;
206    let outcome = crate::validate(
207        &links,
208        cfg,
209        &src,
210        cache,
211        &files,
212        &web_check_files_ids,
213        &all_file_ids,
214        incomplete_links,
215    )?;
216
217    Ok((files, outcome))
218}
219
220fn load_cache(filename: &Path) -> Cache {
221    log::debug!("Loading cache from {}", filename.display());
222
223    match File::open(filename) {
224        Ok(f) => match serde_json::from_reader(f) {
225            Ok(cache) => cache,
226            Err(e) => {
227                log::warn!("Unable to deserialize the cache: {}", e);
228                Cache::default()
229            }
230        },
231        Err(e) => {
232            log::debug!("Unable to open the cache: {}", e);
233            Cache::default()
234        }
235    }
236}
237
238fn save_cache(filename: &Path, cache: &Cache) {
239    if let Some(parent) = filename.parent() {
240        if let Err(e) = std::fs::create_dir_all(parent) {
241            log::warn!("Unable to create the cache's directory: {}", e);
242        }
243    }
244
245    log::debug!("Saving the cache to {}", filename.display());
246
247    match File::create(filename) {
248        Ok(f) => {
249            if let Err(e) = serde_json::to_writer(f, cache) {
250                log::warn!("Saving the cache as JSON failed: {}", e);
251            }
252        }
253        Err(e) => log::warn!("Unable to create the cache file: {}", e),
254    }
255}
256
257#[cfg(test)]
258mod tests {
259    use super::*;
260
261    #[test]
262    fn always_stay_compatible_with_mdbook_dependency() {
263        let got = version_check(mdbook_renderer::MDBOOK_VERSION);
264
265        assert!(
266            got.is_ok(),
267            "Incompatible with mdbook dependency: {:#?}",
268            got.unwrap_err()
269        );
270    }
271}