Skip to main content

mdbook_linkcheck2/
validate.rs

1use crate::{Config, Context, IncompleteLink, WarningPolicy};
2use anyhow::Error;
3use codespan::{FileId, Files};
4use codespan_reporting::diagnostic::{Diagnostic, Label, Severity};
5use linkcheck2::{
6    Category, Link,
7    validation::{Cache, InvalidLink, Options, Outcomes, Reason},
8};
9use std::{
10    collections::{HashMap, HashSet},
11    ffi::{OsStr, OsString},
12    fmt::{self, Display, Formatter},
13    path::{Component, Path, PathBuf},
14    sync::Mutex,
15};
16use tokio::runtime::Builder;
17
18fn lc_validate(
19    links: &[Link],
20    cfg: &Config,
21    src_dir: &Path,
22    cache: &mut Cache,
23    files: &Files<String>,
24    web_check_files_ids: &[FileId],
25    all_files_ids: &[FileId],
26) -> Outcomes {
27    let src_dir = src_dir.to_path_buf();
28    let web_check_files_ids: HashSet<_> = web_check_files_ids.iter().collect();
29    let file_names = all_files_ids
30        .iter()
31        .map(|id| files.name(*id).to_os_string())
32        .collect();
33
34    let options = Options::default()
35        .with_root_directory(&src_dir)
36        .expect("The source directory doesn't exist?")
37        .set_alternate_extensions(vec![("html".to_string(), vec!["md".to_string()])])
38        .set_links_may_traverse_the_root_directory(cfg.traverse_parent_directories)
39        // take into account the `index` preprocessor which rewrites `README.md`
40        // to `index.md` (which tne gets rendered as `index.html`)
41        .set_default_file("README.md")
42        .set_custom_validation(ensure_included_in_book(src_dir.clone(), file_names));
43
44    let interpolated_headers = cfg.interpolate_headers(cfg.warning_policy);
45
46    let ctx = Context {
47        client: cfg.client(),
48        filesystem_options: options,
49        cfg,
50        cache: Mutex::new(cache.clone()),
51        interpolated_headers,
52    };
53    let links = collate_links(links, &src_dir, files);
54
55    let runtime = Builder::new_multi_thread().enable_all().build().unwrap();
56    let got = runtime.block_on(async {
57        let mut outcomes = Outcomes::default();
58
59        for (current_dir, mut links) in links {
60            // Skip web links for files not included in filter selection
61            links.retain(|link| match link.category() {
62                Some(Category::Url(_)) => web_check_files_ids.contains(&link.file),
63                _ => true,
64            });
65
66            outcomes.merge(linkcheck2::validate(&current_dir, links, &ctx).await);
67        }
68
69        outcomes
70    });
71
72    // move the cache out of ctx. We'd get a borrowing error if anything was
73    // using it
74    let updated_cache = ctx.cache;
75
76    *cache = updated_cache
77        .into_inner()
78        .expect("We statically know this isn't used");
79    got
80}
81
82fn ensure_included_in_book(
83    src_dir: PathBuf,
84    file_names: Vec<OsString>,
85) -> impl Fn(&Path, Option<&str>) -> Result<(), Reason> + 'static {
86    move |resolved_link, _| {
87        let resolved_link = match resolved_link.strip_prefix(&src_dir) {
88            Ok(path) => path,
89            // Not part of the book.
90            Err(_) => return Ok(()),
91        };
92        let was_included_in_summary = file_names.iter().any(|summary_path| {
93            let summary_path = Path::new(summary_path);
94            if summary_path.parent() != resolved_link.parent() {
95                return false;
96            }
97            match (summary_path.file_name(), resolved_link.file_name()) {
98                (a, b) if a == b => true,
99                (Some(summary), Some(resolved)) => {
100                    // index preprocessor rewrites summary paths before we get to them.
101                    summary == Path::new("index.md") && resolved == Path::new("README.md")
102                }
103                _ => false,
104            }
105        });
106
107        let ext = resolved_link.extension();
108        let is_markdown = ext == Some(OsStr::new("md"));
109
110        if was_included_in_summary || !is_markdown {
111            Ok(())
112        } else {
113            use std::io::Error;
114
115            Err(Reason::Io(Error::other(NotInSummary {
116                path: resolved_link.to_path_buf(),
117            })))
118        }
119    }
120}
121
122/// An error that is emitted if something links to a file that exists on disk,
123/// but isn't included in the book.
124#[derive(Debug)]
125pub struct NotInSummary {
126    /// The file's full path.
127    pub path: PathBuf,
128}
129
130impl Display for NotInSummary {
131    fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
132        write!(
133            f,
134            "It looks like \"{}\" wasn't included in SUMMARY.md",
135            self.path.display()
136        )
137    }
138}
139
140impl std::error::Error for NotInSummary {}
141
142fn collate_links<'a>(
143    links: &'a [Link],
144    src_dir: &Path,
145    files: &'a Files<String>,
146) -> impl Iterator<Item = (PathBuf, Vec<linkcheck2::Link>)> {
147    let mut links_by_directory: HashMap<PathBuf, Vec<linkcheck2::Link>> = HashMap::new();
148
149    for link in links {
150        let mut path = src_dir.join(files.name(link.file));
151        path.pop();
152        links_by_directory
153            .entry(path)
154            .or_default()
155            .push(link.clone());
156    }
157
158    links_by_directory.into_iter()
159}
160
161fn merge_outcomes(outcomes: Outcomes, incomplete_links: Vec<IncompleteLink>) -> ValidationOutcome {
162    // Note: we want to sort all outcomes by file and then its location in that
163    // file.
164    //
165    // That way, when we emit diagnostics they'll be emitted for each file in
166    // the order that it is listed in `SUMMARY.md`, then individual diagnostics
167    // will be emitted from the start of each file to the end.
168    fn sorted<T, F>(mut items: Vec<T>, mut key: F) -> Vec<T>
169    where
170        F: FnMut(&T) -> &Link,
171    {
172        items.sort_by_key(|item| {
173            let link = key(item);
174            (link.file, link.span)
175        });
176        items
177    }
178
179    fn sorted_link(items: Vec<Link>) -> Vec<Link> {
180        sorted(items, |link| link)
181    }
182
183    ValidationOutcome {
184        invalid_links: sorted(outcomes.invalid, |l| &l.link),
185        ignored: sorted_link(outcomes.ignored),
186        valid_links: sorted_link(outcomes.valid),
187        unknown_category: sorted_link(outcomes.unknown_category),
188        incomplete_links,
189    }
190}
191
192#[allow(clippy::too_many_arguments)]
193/// Try to validate the provided [`Link`]s.
194pub fn validate(
195    links: &[Link],
196    cfg: &Config,
197    src_dir: &Path,
198    cache: &mut Cache,
199    files: &Files<String>,
200    web_check_files_ids: &[FileId],
201    all_files_ids: &[FileId],
202    incomplete_links: Vec<IncompleteLink>,
203) -> Result<ValidationOutcome, Error> {
204    let got = lc_validate(
205        links,
206        cfg,
207        src_dir,
208        cache,
209        files,
210        web_check_files_ids,
211        all_files_ids,
212    );
213    Ok(merge_outcomes(got, incomplete_links))
214}
215
216/// The outcome of validating a set of links.
217#[derive(Debug, Default)]
218pub struct ValidationOutcome {
219    /// Valid links.
220    pub valid_links: Vec<Link>,
221    /// Links where validation failed.
222    pub invalid_links: Vec<InvalidLink>,
223    /// Links which have been ignored (e.g. due to
224    /// [`Config::follow_web_links`]).
225    pub ignored: Vec<Link>,
226    /// Links which we don't know how to handle.
227    pub unknown_category: Vec<Link>,
228    /// Potentially incomplete links.
229    pub incomplete_links: Vec<IncompleteLink>,
230}
231
232impl ValidationOutcome {
233    /// Generate a list of [`Diagnostic`] messages from this
234    /// [`ValidationOutcome`].
235    pub fn generate_diagnostics(
236        &self,
237        files: &Files<String>,
238        warning_policy: WarningPolicy,
239    ) -> Vec<Diagnostic<FileId>> {
240        let mut diags = Vec::new();
241
242        self.add_invalid_link_diagnostics(&mut diags);
243        self.add_incomplete_link_diagnostics(warning_policy, &mut diags);
244        self.warn_on_absolute_links(warning_policy, &mut diags, files);
245
246        diags
247    }
248
249    fn add_incomplete_link_diagnostics(
250        &self,
251        warning_policy: WarningPolicy,
252        diags: &mut Vec<Diagnostic<FileId>>,
253    ) {
254        let severity = match warning_policy {
255            WarningPolicy::Error => Severity::Error,
256            WarningPolicy::Warn => Severity::Warning,
257            WarningPolicy::Ignore => return,
258        };
259
260        for incomplete in &self.incomplete_links {
261            let IncompleteLink {
262                reference,
263                file,
264                span,
265            } = incomplete;
266
267            let msg = format!("Did you forget to define a URL for `{0}`?", reference);
268            let label = Label::primary(*file, *span).with_message(msg);
269            let note = format!(
270                "hint: declare the link's URL. For example: `[{}]: http://example.com/`",
271                reference
272            );
273
274            let diag = Diagnostic::new(severity)
275                .with_message("Potential incomplete link")
276                .with_labels(vec![label])
277                .with_notes(vec![note]);
278            diags.push(diag)
279        }
280    }
281
282    fn add_invalid_link_diagnostics(&self, diags: &mut Vec<Diagnostic<FileId>>) {
283        for broken_link in &self.invalid_links {
284            let link = &broken_link.link;
285            let msg = most_specific_error_message(broken_link);
286            let diag = Diagnostic::error()
287                .with_message(msg.clone())
288                .with_labels(vec![Label::primary(link.file, link.span).with_message(msg)]);
289            diags.push(diag);
290        }
291    }
292
293    /// As shown in https://github.com/Michael-F-Bryan/mdbook-linkcheck/issues/33
294    /// absolute links are actually a bit of a foot gun when the document is
295    /// being read directly from the filesystem.
296    fn warn_on_absolute_links(
297        &self,
298        warning_policy: WarningPolicy,
299        diags: &mut Vec<Diagnostic<FileId>>,
300        files: &Files<String>,
301    ) {
302        const WARNING_MESSAGE: &str = r#"When viewing a document directly from the file system and click on an
303absolute link (e.g. `/index.md`), the browser will try to navigate to
304`/index.md` on the current file system (i.e. the `index.md` file inside
305`/` or `C:\`) instead of the `index.md` file at book's base directory as
306intended.
307
308This warning helps avoid the situation where everything will seem to work
309fine when viewed using a web server (e.g. GitHub Pages or `mdbook serve`),
310but users viewing the book from the file system may encounter broken links.
311
312To ignore this warning, you can edit `book.toml` and set the warning policy to
313"ignore".
314
315    [output.linkcheck2]
316    warning-policy = "ignore"
317
318For more details, see https://github.com/Michael-F-Bryan/mdbook-linkcheck/issues/33
319"#;
320        let severity = match warning_policy {
321            WarningPolicy::Error => Severity::Error,
322            WarningPolicy::Warn => Severity::Warning,
323            WarningPolicy::Ignore => return,
324        };
325
326        let absolute_links = self
327            .valid_links
328            .iter()
329            .filter(|link| link.href.starts_with("/"));
330
331        let mut reasoning_emitted = false;
332
333        for link in absolute_links {
334            let mut notes = Vec::new();
335
336            if !reasoning_emitted {
337                notes.push(String::from(WARNING_MESSAGE));
338                reasoning_emitted = true;
339            }
340
341            if let Some(suggested_change) = relative_path_to_file(files.name(link.file), &link.href)
342            {
343                notes.push(format!(
344                    "Suggestion: change the link to \"{}\"",
345                    suggested_change
346                ));
347            }
348
349            let diag = Diagnostic::new(severity)
350                .with_message("Absolute link should be made relative")
351                .with_notes(notes)
352                .with_labels(vec![
353                    Label::primary(link.file, link.span)
354                        .with_message("Absolute link should be made relative"),
355                ]);
356
357            diags.push(diag);
358        }
359    }
360}
361
362// Path diffing, copied from https://crates.io/crates/pathdiff with some tweaks
363fn relative_path_to_file<S, D>(start: S, destination: D) -> Option<String>
364where
365    S: AsRef<Path>,
366    D: AsRef<Path>,
367{
368    let destination = destination.as_ref();
369    let start = start.as_ref();
370    log::debug!(
371        "Trying to find the relative path from \"{}\" to \"{}\"",
372        start.display(),
373        destination.display()
374    );
375
376    let start = start.parent()?;
377    let destination_name = destination.file_name()?;
378    let destination = destination.parent()?;
379
380    let mut ita = destination.components().skip(1);
381    let mut itb = start.components();
382
383    let mut comps: Vec<Component> = vec![];
384
385    loop {
386        match (ita.next(), itb.next()) {
387            (None, None) => break,
388            (Some(a), None) => {
389                comps.push(a);
390                comps.extend(ita.by_ref());
391                break;
392            }
393            (None, _) => comps.push(Component::ParentDir),
394            (Some(a), Some(b)) if comps.is_empty() && a == b => (),
395            (Some(a), Some(Component::CurDir)) => comps.push(a),
396            (Some(_), Some(Component::ParentDir)) => return None,
397            (Some(a), Some(_)) => {
398                comps.push(Component::ParentDir);
399                for _ in itb {
400                    comps.push(Component::ParentDir);
401                }
402                comps.push(a);
403                comps.extend(ita.by_ref());
404                break;
405            }
406        }
407    }
408
409    let path: PathBuf = comps
410        .iter()
411        .map(|c| c.as_os_str())
412        .chain(std::iter::once(destination_name))
413        .collect();
414
415    // Note: URLs always use forward slashes
416    Some(path.display().to_string().replace('\\', "/"))
417}
418
419fn most_specific_error_message(link: &InvalidLink) -> String {
420    if link.reason.file_not_found() {
421        return format!("File not found: {}", link.link.href);
422    }
423
424    match link.reason {
425        Reason::Io(ref io) => io.to_string(),
426        Reason::Web(ref web) if web.is_status() => {
427            let status = web
428                .status()
429                .expect("Response::error_for_status() always contains a status code");
430            let url = web
431                .url()
432                .expect("Response::error_for_status() always contains a URL");
433
434            match status.canonical_reason() {
435                Some(reason) => {
436                    format!("Server returned {} {} for {}", status.as_u16(), reason, url)
437                }
438                None => {
439                    format!("Server returned {} for {}", status.as_u16(), url)
440                }
441            }
442        }
443        Reason::Web(ref web) => web.to_string(),
444        // fall back to the Reason's Display impl
445        _ => link.reason.to_string(),
446    }
447}
448
449#[cfg(test)]
450mod tests {
451    use super::*;
452
453    #[test]
454    fn check_some_simple_relative_paths() {
455        let inputs = vec![
456            ("index.md", "/other.md", "other.md"),
457            ("index.md", "/nested/other.md", "nested/other.md"),
458            ("nested/index.md", "/other.md", "../other.md"),
459        ];
460
461        for (start, destination, should_be) in inputs {
462            let got = relative_path_to_file(start, destination).unwrap();
463            assert_eq!(got, should_be);
464        }
465    }
466}