mdbook_linkcheck2/
validate.rs

1use crate::{Config, Context, IncompleteLink, WarningPolicy};
2use anyhow::Error;
3use codespan::{FileId, Files};
4use codespan_reporting::diagnostic::{Diagnostic, Label, Severity};
5use linkcheck2::{
6    validation::{Cache, InvalidLink, Options, Outcomes, Reason},
7    Category, Link,
8};
9use std::{
10    collections::{HashMap, HashSet},
11    ffi::{OsStr, OsString},
12    fmt::{self, Display, Formatter},
13    path::{Component, Path, PathBuf},
14    sync::Mutex,
15};
16use tokio::runtime::Builder;
17
18fn lc_validate(
19    links: &[Link],
20    cfg: &Config,
21    src_dir: &Path,
22    cache: &mut Cache,
23    files: &Files<String>,
24    web_check_files_ids: &[FileId],
25    all_files_ids: &[FileId],
26) -> Outcomes {
27    let web_check_files_ids: HashSet<_> = web_check_files_ids.iter().collect();
28    let file_names = all_files_ids
29        .iter()
30        .map(|id| files.name(*id).to_os_string())
31        .collect();
32
33    let options = Options::default()
34        .with_root_directory(src_dir)
35        .expect("The source directory doesn't exist?")
36        .set_alternate_extensions(vec![("html".to_string(), vec!["md".to_string()])])
37        .set_links_may_traverse_the_root_directory(cfg.traverse_parent_directories)
38        // take into account the `index` preprocessor which rewrites `README.md`
39        // to `index.md` (which tne gets rendered as `index.html`)
40        .set_default_file("README.md")
41        .set_custom_validation(ensure_included_in_book(src_dir, file_names));
42
43    let interpolated_headers = cfg.interpolate_headers(cfg.warning_policy);
44
45    let ctx = Context {
46        client: cfg.client(),
47        filesystem_options: options,
48        cfg,
49        cache: Mutex::new(cache.clone()),
50        interpolated_headers,
51    };
52    let links = collate_links(links, src_dir, files);
53
54    let runtime = Builder::new_multi_thread().enable_all().build().unwrap();
55    let got = runtime.block_on(async {
56        let mut outcomes = Outcomes::default();
57
58        for (current_dir, mut links) in links {
59            // Skip web links for files not included in filter selection
60            links.retain(|link| match link.category() {
61                Some(Category::Url(_)) => web_check_files_ids.contains(&link.file),
62                _ => true,
63            });
64
65            outcomes.merge(linkcheck2::validate(&current_dir, links, &ctx).await);
66        }
67
68        outcomes
69    });
70
71    // move the cache out of ctx. We'd get a borrowing error if anything was
72    // using it
73    let updated_cache = ctx.cache;
74
75    *cache = updated_cache
76        .into_inner()
77        .expect("We statically know this isn't used");
78    got
79}
80
81fn ensure_included_in_book(
82    src_dir: &Path,
83    file_names: Vec<OsString>,
84) -> impl Fn(&Path, Option<&str>) -> Result<(), Reason> {
85    let src_dir = src_dir.to_path_buf();
86
87    move |resolved_link, _| {
88        let resolved_link = match resolved_link.strip_prefix(&src_dir) {
89            Ok(path) => path,
90            // Not part of the book.
91            Err(_) => return Ok(()),
92        };
93        let was_included_in_summary = file_names.iter().any(|summary_path| {
94            let summary_path = Path::new(summary_path);
95            if summary_path.parent() != resolved_link.parent() {
96                return false;
97            }
98            match (summary_path.file_name(), resolved_link.file_name()) {
99                (a, b) if a == b => true,
100                (Some(summary), Some(resolved)) => {
101                    // index preprocessor rewrites summary paths before we get to them.
102                    summary == Path::new("index.md") && resolved == Path::new("README.md")
103                }
104                _ => false,
105            }
106        });
107
108        let ext = resolved_link.extension();
109        let is_markdown = ext == Some(OsStr::new("md"));
110
111        if was_included_in_summary || !is_markdown {
112            Ok(())
113        } else {
114            use std::io::{Error, ErrorKind};
115
116            Err(Reason::Io(Error::new(
117                ErrorKind::Other,
118                NotInSummary {
119                    path: resolved_link.to_path_buf(),
120                },
121            )))
122        }
123    }
124}
125
126/// An error that is emitted if something links to a file that exists on disk,
127/// but isn't included in the book.
128#[derive(Debug)]
129pub struct NotInSummary {
130    /// The file's full path.
131    pub path: PathBuf,
132}
133
134impl Display for NotInSummary {
135    fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
136        write!(
137            f,
138            "It looks like \"{}\" wasn't included in SUMMARY.md",
139            self.path.display()
140        )
141    }
142}
143
144impl std::error::Error for NotInSummary {}
145
146fn collate_links<'a>(
147    links: &'a [Link],
148    src_dir: &Path,
149    files: &'a Files<String>,
150) -> impl Iterator<Item = (PathBuf, Vec<linkcheck2::Link>)> {
151    let mut links_by_directory: HashMap<PathBuf, Vec<linkcheck2::Link>> = HashMap::new();
152
153    for link in links {
154        let mut path = src_dir.join(files.name(link.file));
155        path.pop();
156        links_by_directory
157            .entry(path)
158            .or_default()
159            .push(link.clone());
160    }
161
162    links_by_directory.into_iter()
163}
164
165fn merge_outcomes(outcomes: Outcomes, incomplete_links: Vec<IncompleteLink>) -> ValidationOutcome {
166    // Note: we want to sort all outcomes by file and then its location in that
167    // file.
168    //
169    // That way, when we emit diagnostics they'll be emitted for each file in
170    // the order that it is listed in `SUMMARY.md`, then individual diagnostics
171    // will be emitted from the start of each file to the end.
172    fn sorted<T, F>(mut items: Vec<T>, mut key: F) -> Vec<T>
173    where
174        F: FnMut(&T) -> &Link,
175    {
176        items.sort_by_key(|item| {
177            let link = key(item);
178            (link.file, link.span)
179        });
180        items
181    }
182
183    fn sorted_link(items: Vec<Link>) -> Vec<Link> {
184        sorted(items, |link| link)
185    }
186
187    ValidationOutcome {
188        invalid_links: sorted(outcomes.invalid, |l| &l.link),
189        ignored: sorted_link(outcomes.ignored),
190        valid_links: sorted_link(outcomes.valid),
191        unknown_category: sorted_link(outcomes.unknown_category),
192        incomplete_links,
193    }
194}
195
196#[allow(clippy::too_many_arguments)]
197/// Try to validate the provided [`Link`]s.
198pub fn validate(
199    links: &[Link],
200    cfg: &Config,
201    src_dir: &Path,
202    cache: &mut Cache,
203    files: &Files<String>,
204    web_check_files_ids: &[FileId],
205    all_files_ids: &[FileId],
206    incomplete_links: Vec<IncompleteLink>,
207) -> Result<ValidationOutcome, Error> {
208    let got = lc_validate(
209        links,
210        cfg,
211        src_dir,
212        cache,
213        files,
214        web_check_files_ids,
215        all_files_ids,
216    );
217    Ok(merge_outcomes(got, incomplete_links))
218}
219
220/// The outcome of validating a set of links.
221#[derive(Debug, Default)]
222pub struct ValidationOutcome {
223    /// Valid links.
224    pub valid_links: Vec<Link>,
225    /// Links where validation failed.
226    pub invalid_links: Vec<InvalidLink>,
227    /// Links which have been ignored (e.g. due to
228    /// [`Config::follow_web_links`]).
229    pub ignored: Vec<Link>,
230    /// Links which we don't know how to handle.
231    pub unknown_category: Vec<Link>,
232    /// Potentially incomplete links.
233    pub incomplete_links: Vec<IncompleteLink>,
234}
235
236impl ValidationOutcome {
237    /// Generate a list of [`Diagnostic`] messages from this
238    /// [`ValidationOutcome`].
239    pub fn generate_diagnostics(
240        &self,
241        files: &Files<String>,
242        warning_policy: WarningPolicy,
243    ) -> Vec<Diagnostic<FileId>> {
244        let mut diags = Vec::new();
245
246        self.add_invalid_link_diagnostics(&mut diags);
247        self.add_incomplete_link_diagnostics(warning_policy, &mut diags);
248        self.warn_on_absolute_links(warning_policy, &mut diags, files);
249
250        diags
251    }
252
253    fn add_incomplete_link_diagnostics(
254        &self,
255        warning_policy: WarningPolicy,
256        diags: &mut Vec<Diagnostic<FileId>>,
257    ) {
258        let severity = match warning_policy {
259            WarningPolicy::Error => Severity::Error,
260            WarningPolicy::Warn => Severity::Warning,
261            WarningPolicy::Ignore => return,
262        };
263
264        for incomplete in &self.incomplete_links {
265            let IncompleteLink {
266                ref reference,
267                file,
268                span,
269            } = incomplete;
270
271            let msg = format!("Did you forget to define a URL for `{0}`?", reference);
272            let label = Label::primary(*file, *span).with_message(msg);
273            let note = format!(
274                "hint: declare the link's URL. For example: `[{}]: http://example.com/`",
275                reference
276            );
277
278            let diag = Diagnostic::new(severity)
279                .with_message("Potential incomplete link")
280                .with_labels(vec![label])
281                .with_notes(vec![note]);
282            diags.push(diag)
283        }
284    }
285
286    fn add_invalid_link_diagnostics(&self, diags: &mut Vec<Diagnostic<FileId>>) {
287        for broken_link in &self.invalid_links {
288            let link = &broken_link.link;
289            let msg = most_specific_error_message(broken_link);
290            let diag = Diagnostic::error()
291                .with_message(msg.clone())
292                .with_labels(vec![Label::primary(link.file, link.span).with_message(msg)]);
293            diags.push(diag);
294        }
295    }
296
297    /// As shown in https://github.com/Michael-F-Bryan/mdbook-linkcheck/issues/33
298    /// absolute links are actually a bit of a foot gun when the document is
299    /// being read directly from the filesystem.
300    fn warn_on_absolute_links(
301        &self,
302        warning_policy: WarningPolicy,
303        diags: &mut Vec<Diagnostic<FileId>>,
304        files: &Files<String>,
305    ) {
306        const WARNING_MESSAGE: &str = r#"When viewing a document directly from the file system and click on an
307absolute link (e.g. `/index.md`), the browser will try to navigate to
308`/index.md` on the current file system (i.e. the `index.md` file inside
309`/` or `C:\`) instead of the `index.md` file at book's base directory as
310intended.
311
312This warning helps avoid the situation where everything will seem to work
313fine when viewed using a web server (e.g. GitHub Pages or `mdbook serve`),
314but users viewing the book from the file system may encounter broken links.
315
316To ignore this warning, you can edit `book.toml` and set the warning policy to
317"ignore".
318
319    [output.linkcheck2]
320    warning-policy = "ignore"
321
322For more details, see https://github.com/Michael-F-Bryan/mdbook-linkcheck/issues/33
323"#;
324        let severity = match warning_policy {
325            WarningPolicy::Error => Severity::Error,
326            WarningPolicy::Warn => Severity::Warning,
327            WarningPolicy::Ignore => return,
328        };
329
330        let absolute_links = self
331            .valid_links
332            .iter()
333            .filter(|link| link.href.starts_with("/"));
334
335        let mut reasoning_emitted = false;
336
337        for link in absolute_links {
338            let mut notes = Vec::new();
339
340            if !reasoning_emitted {
341                notes.push(String::from(WARNING_MESSAGE));
342                reasoning_emitted = true;
343            }
344
345            if let Some(suggested_change) = relative_path_to_file(files.name(link.file), &link.href)
346            {
347                notes.push(format!(
348                    "Suggestion: change the link to \"{}\"",
349                    suggested_change
350                ));
351            }
352
353            let diag = Diagnostic::new(severity)
354                .with_message("Absolute link should be made relative")
355                .with_notes(notes)
356                .with_labels(vec![Label::primary(link.file, link.span)
357                    .with_message("Absolute link should be made relative")]);
358
359            diags.push(diag);
360        }
361    }
362}
363
364// Path diffing, copied from https://crates.io/crates/pathdiff with some tweaks
365fn relative_path_to_file<S, D>(start: S, destination: D) -> Option<String>
366where
367    S: AsRef<Path>,
368    D: AsRef<Path>,
369{
370    let destination = destination.as_ref();
371    let start = start.as_ref();
372    log::debug!(
373        "Trying to find the relative path from \"{}\" to \"{}\"",
374        start.display(),
375        destination.display()
376    );
377
378    let start = start.parent()?;
379    let destination_name = destination.file_name()?;
380    let destination = destination.parent()?;
381
382    let mut ita = destination.components().skip(1);
383    let mut itb = start.components();
384
385    let mut comps: Vec<Component> = vec![];
386
387    loop {
388        match (ita.next(), itb.next()) {
389            (None, None) => break,
390            (Some(a), None) => {
391                comps.push(a);
392                comps.extend(ita.by_ref());
393                break;
394            }
395            (None, _) => comps.push(Component::ParentDir),
396            (Some(a), Some(b)) if comps.is_empty() && a == b => (),
397            (Some(a), Some(Component::CurDir)) => comps.push(a),
398            (Some(_), Some(Component::ParentDir)) => return None,
399            (Some(a), Some(_)) => {
400                comps.push(Component::ParentDir);
401                for _ in itb {
402                    comps.push(Component::ParentDir);
403                }
404                comps.push(a);
405                comps.extend(ita.by_ref());
406                break;
407            }
408        }
409    }
410
411    let path: PathBuf = comps
412        .iter()
413        .map(|c| c.as_os_str())
414        .chain(std::iter::once(destination_name))
415        .collect();
416
417    // Note: URLs always use forward slashes
418    Some(path.display().to_string().replace('\\', "/"))
419}
420
421fn most_specific_error_message(link: &InvalidLink) -> String {
422    if link.reason.file_not_found() {
423        return format!("File not found: {}", link.link.href);
424    }
425
426    match link.reason {
427        Reason::Io(ref io) => io.to_string(),
428        Reason::Web(ref web) if web.is_status() => {
429            let status = web
430                .status()
431                .expect("Response::error_for_status() always contains a status code");
432            let url = web
433                .url()
434                .expect("Response::error_for_status() always contains a URL");
435
436            match status.canonical_reason() {
437                Some(reason) => {
438                    format!("Server returned {} {} for {}", status.as_u16(), reason, url)
439                }
440                None => {
441                    format!("Server returned {} for {}", status.as_u16(), url)
442                }
443            }
444        }
445        Reason::Web(ref web) => web.to_string(),
446        // fall back to the Reason's Display impl
447        _ => link.reason.to_string(),
448    }
449}
450
451#[cfg(test)]
452mod tests {
453    use super::*;
454
455    #[test]
456    fn check_some_simple_relative_paths() {
457        let inputs = vec![
458            ("index.md", "/other.md", "other.md"),
459            ("index.md", "/nested/other.md", "nested/other.md"),
460            ("nested/index.md", "/other.md", "../other.md"),
461        ];
462
463        for (start, destination, should_be) in inputs {
464            let got = relative_path_to_file(start, destination).unwrap();
465            assert_eq!(got, should_be);
466        }
467    }
468}