mdbook_linkcheck2/
links.rs

1use crate::{
2    config::Config,
3    latex::{filter_out_latex, ByteIndexMap},
4};
5use codespan::{ByteIndex, FileId, Files, Span};
6use linkcheck2::Link;
7use pulldown_cmark::{BrokenLink, CowStr};
8use std::{cell::RefCell, fmt::Debug};
9
10/// Search every file in the [`Files`] and collate all the links that are
11/// found.
12pub fn extract<I>(
13    cfg: &Config,
14    target_files: I,
15    files: &Files<String>,
16) -> (Vec<Link>, Vec<IncompleteLink>)
17where
18    I: IntoIterator<Item = FileId>,
19{
20    let mut links = Vec::new();
21    let broken_links = RefCell::new(Vec::new());
22
23    for file_id in target_files {
24        let src = files.source(file_id);
25
26        let (src, byte_index_map) = if cfg.latex_support {
27            filter_out_latex(src)
28        } else {
29            (src.clone(), ByteIndexMap::new())
30        };
31
32        log::debug!("Scanning {}", files.name(file_id).to_string_lossy());
33
34        let mapspan = |span: Span| {
35            Span::new(
36                ByteIndex(byte_index_map.resolve(span.start().to_usize() as u32)),
37                ByteIndex(byte_index_map.resolve(span.end().to_usize() as u32)),
38            )
39        };
40
41        links.extend(
42            scan_links(file_id, &src, &mut |broken_link| {
43                let BrokenLink {
44                    reference, span, ..
45                } = broken_link;
46                log::debug!(
47                    "Found a (possibly) broken link to [{}] at {:?}",
48                    reference,
49                    span
50                );
51
52                ////assert!(false, "kek panic, unreachable?");
53                // println!(
54                //    "start {:?} end {:?} res_a {:?} res_b {:?}",
55                //    span.start,
56                //    span.end,
57                //    ByteIndex(byte_index_map.resolve(span.start as u32)),
58                //    ByteIndex(byte_index_map.resolve(span.end as u32))
59                //);
60                let origspan = Span::new(ByteIndex(span.start as u32), ByteIndex(span.end as u32));
61                let span = mapspan(origspan);
62
63                broken_links.borrow_mut().push(IncompleteLink {
64                    reference: reference.to_string(),
65                    span,
66                    file: file_id,
67                });
68                None
69            })
70            .map(|link| Link::new(link.href, mapspan(link.span), link.file)),
71        );
72    }
73
74    (links, broken_links.into_inner())
75}
76
77fn scan_links<'a, F>(
78    file_id: FileId,
79    src: &'a str,
80    cb: &'a mut F,
81) -> impl Iterator<Item = Link> + 'a
82where
83    F: FnMut(BrokenLink<'_>) -> Option<(CowStr<'a>, CowStr<'a>)> + 'a,
84{
85    linkcheck2::scanners::markdown_with_broken_link_callback(src, Some(cb))
86        .map(move |(link, span)| Link::new(link, span, file_id))
87}
88
89/// A potential link that has a broken reference (e.g `[foo]` when there is no
90/// `[foo]: ...` entry at the bottom).
91#[derive(Debug, Clone, PartialEq)]
92pub struct IncompleteLink {
93    /// The reference name (e.g. the `foo` in `[foo]`).
94    pub reference: String,
95    /// Which file was the incomplete link found in?
96    pub file: FileId,
97    /// Where this incomplete link occurred in the source text.
98    pub span: Span,
99}