mdbook_linkcheck/
validate.rs

1use crate::{Config, Context, IncompleteLink, WarningPolicy};
2use anyhow::Error;
3use codespan::{FileId, Files};
4use codespan_reporting::diagnostic::{Diagnostic, Label, Severity};
5use linkcheck::{
6    validation::{Cache, InvalidLink, Options, Outcomes, Reason},
7    Link,
8};
9use std::{
10    collections::HashMap,
11    ffi::{OsStr, OsString},
12    fmt::{self, Display, Formatter},
13    path::{Component, Path, PathBuf},
14    sync::Mutex,
15};
16use tokio::runtime::Builder;
17
18fn lc_validate(
19    links: &[Link],
20    cfg: &Config,
21    src_dir: &Path,
22    cache: &mut Cache,
23    files: &Files<String>,
24    file_ids: &[FileId],
25) -> Outcomes {
26    let file_names = file_ids
27        .iter()
28        .map(|id| files.name(*id).to_os_string())
29        .collect();
30
31    let options = Options::default()
32        .with_root_directory(src_dir)
33        .expect("The source directory doesn't exist?")
34        .set_alternate_extensions(vec![(
35            "html".to_string(),
36            vec!["md".to_string()],
37        )])
38        .set_links_may_traverse_the_root_directory(
39            cfg.traverse_parent_directories,
40        )
41        // take into account the `index` preprocessor which rewrites `README.md`
42        // to `index.md` (which tne gets rendered as `index.html`)
43        .set_default_file("README.md")
44        .set_custom_validation(ensure_included_in_book(src_dir, file_names));
45
46    let interpolated_headers = cfg.interpolate_headers(cfg.warning_policy);
47
48    let ctx = Context {
49        client: cfg.client(),
50        filesystem_options: options,
51        cfg,
52        src_dir,
53        cache: Mutex::new(cache.clone()),
54        files,
55        interpolated_headers,
56    };
57    let links = collate_links(links, src_dir, files);
58
59    let runtime = Builder::new_multi_thread().enable_all().build().unwrap();
60    let got = runtime.block_on(async {
61        let mut outcomes = Outcomes::default();
62
63        for (current_dir, links) in links {
64            outcomes
65                .merge(linkcheck::validate(&current_dir, links, &ctx).await);
66        }
67
68        outcomes
69    });
70
71    // move the cache out of ctx. We'd get a borrowing error if anything was
72    // using it
73    let updated_cache = ctx.cache;
74
75    *cache = updated_cache
76        .into_inner()
77        .expect("We statically know this isn't used");
78    got
79}
80
81fn ensure_included_in_book(
82    src_dir: &Path,
83    file_names: Vec<OsString>,
84) -> impl Fn(&Path, Option<&str>) -> Result<(), Reason> {
85    let src_dir = src_dir.to_path_buf();
86
87    move |resolved_link, _| {
88        let resolved_link = match resolved_link.strip_prefix(&src_dir) {
89            Ok(path) => path,
90            // Not part of the book.
91            Err(_) => return Ok(()),
92        };
93        let was_included_in_summary =
94            file_names.iter().any(|summary_path| {
95                let summary_path = Path::new(summary_path);
96                if summary_path.parent() != resolved_link.parent() {
97                    return false;
98                }
99                match (summary_path.file_name(), resolved_link.file_name()) {
100                    (a, b) if a == b => true,
101                    (Some(summary), Some(resolved)) => {
102                        // index preprocessor rewrites summary paths before we get to them.
103                        summary == Path::new("index.md") && resolved == Path::new("README.md")
104                    }
105                    _ => false,
106                }
107            });
108        let ext = resolved_link.extension();
109        let is_markdown = ext == Some(OsStr::new("md"));
110
111        if was_included_in_summary || !is_markdown {
112            Ok(())
113        } else {
114            use std::io::{Error, ErrorKind};
115
116            Err(Reason::Io(Error::new(
117                ErrorKind::Other,
118                NotInSummary {
119                    path: resolved_link.to_path_buf(),
120                },
121            )))
122        }
123    }
124}
125
126/// An error that is emitted if something links to a file that exists on disk,
127/// but isn't included in the book.
128#[derive(Debug)]
129pub struct NotInSummary {
130    /// The file's full path.
131    pub path: PathBuf,
132}
133
134impl Display for NotInSummary {
135    fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
136        write!(
137            f,
138            "It looks like \"{}\" wasn't included in SUMMARY.md",
139            self.path.display()
140        )
141    }
142}
143
144impl std::error::Error for NotInSummary {}
145
146fn collate_links<'a>(
147    links: &'a [Link],
148    src_dir: &Path,
149    files: &'a Files<String>,
150) -> impl Iterator<Item = (PathBuf, Vec<linkcheck::Link>)> {
151    let mut links_by_directory: HashMap<PathBuf, Vec<linkcheck::Link>> =
152        HashMap::new();
153
154    for link in links {
155        let mut path = src_dir.join(files.name(link.file));
156        path.pop();
157        links_by_directory
158            .entry(path)
159            .or_default()
160            .push(link.clone());
161    }
162
163    links_by_directory.into_iter()
164}
165
166fn merge_outcomes(
167    outcomes: Outcomes,
168    incomplete_links: Vec<IncompleteLink>,
169) -> ValidationOutcome {
170    // Note: we want to sort all outcomes by file and then its location in that
171    // file.
172    //
173    // That way, when we emit diagnostics they'll be emitted for each file in
174    // the order that it is listed in `SUMMARY.md`, then individual diagnostics
175    // will be emitted from the start of each file to the end.
176    fn sorted<T, F>(mut items: Vec<T>, mut key: F) -> Vec<T>
177    where
178        F: FnMut(&T) -> &Link,
179    {
180        items.sort_by_key(|item| {
181            let link = key(item);
182            (link.file, link.span)
183        });
184        items
185    }
186    fn sorted_link(items: Vec<Link>) -> Vec<Link> { sorted(items, |link| link) }
187
188    ValidationOutcome {
189        invalid_links: sorted(outcomes.invalid, |l| &l.link),
190        ignored: sorted_link(outcomes.ignored),
191        valid_links: sorted_link(outcomes.valid),
192        unknown_category: sorted_link(outcomes.unknown_category),
193        incomplete_links,
194    }
195}
196
197/// Try to validate the provided [`Link`]s.
198pub fn validate(
199    links: &[Link],
200    cfg: &Config,
201    src_dir: &Path,
202    cache: &mut Cache,
203    files: &Files<String>,
204    file_ids: &[FileId],
205    incomplete_links: Vec<IncompleteLink>,
206) -> Result<ValidationOutcome, Error> {
207    let got = lc_validate(links, cfg, src_dir, cache, files, file_ids);
208    Ok(merge_outcomes(got, incomplete_links))
209}
210
211/// The outcome of validating a set of links.
212#[derive(Debug, Default)]
213pub struct ValidationOutcome {
214    /// Valid links.
215    pub valid_links: Vec<Link>,
216    /// Links where validation failed.
217    pub invalid_links: Vec<InvalidLink>,
218    /// Links which have been ignored (e.g. due to
219    /// [`Config::follow_web_links`]).
220    pub ignored: Vec<Link>,
221    /// Links which we don't know how to handle.
222    pub unknown_category: Vec<Link>,
223    /// Potentially incomplete links.
224    pub incomplete_links: Vec<IncompleteLink>,
225}
226
227impl ValidationOutcome {
228    /// Generate a list of [`Diagnostic`] messages from this
229    /// [`ValidationOutcome`].
230    pub fn generate_diagnostics(
231        &self,
232        files: &Files<String>,
233        warning_policy: WarningPolicy,
234    ) -> Vec<Diagnostic<FileId>> {
235        let mut diags = Vec::new();
236
237        self.add_invalid_link_diagnostics(&mut diags);
238        self.add_incomplete_link_diagnostics(warning_policy, &mut diags);
239        self.warn_on_absolute_links(warning_policy, &mut diags, files);
240
241        diags
242    }
243
244    fn add_incomplete_link_diagnostics(
245        &self,
246        warning_policy: WarningPolicy,
247        diags: &mut Vec<Diagnostic<FileId>>,
248    ) {
249        let severity = match warning_policy {
250            WarningPolicy::Error => Severity::Error,
251            WarningPolicy::Warn => Severity::Warning,
252            WarningPolicy::Ignore => return,
253        };
254
255        for incomplete in &self.incomplete_links {
256            let IncompleteLink {
257                ref reference,
258                file,
259                span,
260            } = incomplete;
261
262            let msg =
263                format!("Did you forget to define a URL for `{0}`?", reference);
264            let label = Label::primary(*file, *span).with_message(msg);
265            let note = format!(
266                "hint: declare the link's URL. For example: `[{}]: http://example.com/`",
267                reference
268            );
269
270            let diag = Diagnostic::new(severity)
271                .with_message("Potential incomplete link")
272                .with_labels(vec![label])
273                .with_notes(vec![note]);
274            diags.push(diag)
275        }
276    }
277
278    fn add_invalid_link_diagnostics(
279        &self,
280        diags: &mut Vec<Diagnostic<FileId>>,
281    ) {
282        for broken_link in &self.invalid_links {
283            let link = &broken_link.link;
284            let msg = most_specific_error_message(&broken_link);
285            let diag = Diagnostic::error()
286                .with_message(msg.clone())
287                .with_labels(vec![
288                    Label::primary(link.file, link.span).with_message(msg)
289                ]);
290            diags.push(diag);
291        }
292    }
293
294    /// As shown in https://github.com/Michael-F-Bryan/mdbook-linkcheck/issues/33
295    /// absolute links are actually a bit of a foot gun when the document is
296    /// being read directly from the filesystem.
297    fn warn_on_absolute_links(
298        &self,
299        warning_policy: WarningPolicy,
300        diags: &mut Vec<Diagnostic<FileId>>,
301        files: &Files<String>,
302    ) {
303        const WARNING_MESSAGE: &'static str = r#"When viewing a document directly from the file system and click on an
304absolute link (e.g. `/index.md`), the browser will try to navigate to
305`/index.md` on the current file system (i.e. the `index.md` file inside
306`/` or `C:\`) instead of the `index.md` file at book's base directory as
307intended.
308
309This warning helps avoid the situation where everything will seem to work
310fine when viewed using a web server (e.g. GitHub Pages or `mdbook serve`),
311but users viewing the book from the file system may encounter broken links.
312
313To ignore this warning, you can edit `book.toml` and set the warning policy to
314"ignore".
315
316    [output.linkcheck]
317    warning-policy = "ignore"
318
319For more details, see https://github.com/Michael-F-Bryan/mdbook-linkcheck/issues/33
320"#;
321        let severity = match warning_policy {
322            WarningPolicy::Error => Severity::Error,
323            WarningPolicy::Warn => Severity::Warning,
324            WarningPolicy::Ignore => return,
325        };
326
327        let absolute_links = self
328            .valid_links
329            .iter()
330            .filter(|link| link.href.starts_with("/"));
331
332        let mut reasoning_emitted = false;
333
334        for link in absolute_links {
335            let mut notes = Vec::new();
336
337            if !reasoning_emitted {
338                notes.push(String::from(WARNING_MESSAGE));
339                reasoning_emitted = true;
340            }
341
342            if let Some(suggested_change) =
343                relative_path_to_file(files.name(link.file), &link.href)
344            {
345                notes.push(format!(
346                    "Suggestion: change the link to \"{}\"",
347                    suggested_change
348                ));
349            }
350
351            let diag = Diagnostic::new(severity)
352                .with_message("Absolute link should be made relative")
353                .with_notes(notes)
354                .with_labels(vec![Label::primary(link.file, link.span)
355                    .with_message("Absolute link should be made relative")]);
356
357            diags.push(diag);
358        }
359    }
360}
361
362// Path diffing, copied from https://crates.io/crates/pathdiff with some tweaks
363fn relative_path_to_file<S, D>(start: S, destination: D) -> Option<String>
364where
365    S: AsRef<Path>,
366    D: AsRef<Path>,
367{
368    let destination = destination.as_ref();
369    let start = start.as_ref();
370    log::debug!(
371        "Trying to find the relative path from \"{}\" to \"{}\"",
372        start.display(),
373        destination.display()
374    );
375
376    let start = start.parent()?;
377    let destination_name = destination.file_name()?;
378    let destination = destination.parent()?;
379
380    let mut ita = destination.components().skip(1);
381    let mut itb = start.components();
382
383    let mut comps: Vec<Component> = vec![];
384
385    loop {
386        match (ita.next(), itb.next()) {
387            (None, None) => break,
388            (Some(a), None) => {
389                comps.push(a);
390                comps.extend(ita.by_ref());
391                break;
392            },
393            (None, _) => comps.push(Component::ParentDir),
394            (Some(a), Some(b)) if comps.is_empty() && a == b => (),
395            (Some(a), Some(b)) if b == Component::CurDir => comps.push(a),
396            (Some(_), Some(b)) if b == Component::ParentDir => return None,
397            (Some(a), Some(_)) => {
398                comps.push(Component::ParentDir);
399                for _ in itb {
400                    comps.push(Component::ParentDir);
401                }
402                comps.push(a);
403                comps.extend(ita.by_ref());
404                break;
405            },
406        }
407    }
408
409    let path: PathBuf = comps
410        .iter()
411        .map(|c| c.as_os_str())
412        .chain(std::iter::once(destination_name))
413        .collect();
414
415    // Note: URLs always use forward slashes
416    Some(path.display().to_string().replace('\\', "/"))
417}
418
419fn most_specific_error_message(link: &InvalidLink) -> String {
420    if link.reason.file_not_found() {
421        return format!("File not found: {}", link.link.href);
422    }
423
424    match link.reason {
425        Reason::Io(ref io) => io.to_string(),
426        Reason::Web(ref web) if web.is_status() => {
427            let status = web.status().expect(
428                "Response::error_for_status() always contains a status code",
429            );
430            let url = web
431                .url()
432                .expect("Response::error_for_status() always contains a URL");
433
434            match status.canonical_reason() {
435                Some(reason) => format!(
436                    "Server returned {} {} for {}",
437                    status.as_u16(),
438                    reason,
439                    url
440                ),
441                None => {
442                    format!("Server returned {} for {}", status.as_u16(), url)
443                },
444            }
445        },
446        Reason::Web(ref web) => web.to_string(),
447        // fall back to the Reason's Display impl
448        _ => link.reason.to_string(),
449    }
450}
451
452#[cfg(test)]
453mod tests {
454    use super::*;
455
456    #[test]
457    fn check_some_simple_relative_paths() {
458        let inputs = vec![
459            ("index.md", "/other.md", "other.md"),
460            ("index.md", "/nested/other.md", "nested/other.md"),
461            ("nested/index.md", "/other.md", "../other.md"),
462        ];
463
464        for (start, destination, should_be) in inputs {
465            let got = relative_path_to_file(start, destination).unwrap();
466            assert_eq!(got, should_be);
467        }
468    }
469}