1use std::{
3 fmt,
4 path::{Path, PathBuf},
5};
6
7use log::info;
8use rayon::prelude::*;
9use rayon::ThreadPoolBuilder;
10use url::Url;
11use walkdir::{DirEntry, WalkDir};
12
13use check::is_available;
14
15pub use check::{CheckError, IoError};
16
17mod check;
18mod parse;
19
20#[derive(Copy, Clone, Debug, PartialEq, Eq)]
21pub enum HttpCheck {
23 Enabled,
25 Ignored,
27 Forbidden,
32}
33
34#[derive(Clone, Debug)]
36pub struct CheckContext {
37 pub verbose: bool,
38 pub check_http: HttpCheck,
39 pub check_fragments: bool,
40 pub check_intra_doc_links: bool,
41}
42
43impl Default for CheckContext {
44 fn default() -> Self {
45 CheckContext {
46 check_http: HttpCheck::Ignored,
47 verbose: false,
48 check_fragments: true,
49 check_intra_doc_links: false,
50 }
51 }
52}
53
54#[derive(Debug)]
55pub struct FileError {
56 pub path: PathBuf,
57 pub errors: Vec<CheckError>,
58}
59
60impl fmt::Display for FileError {
61 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
62 write!(f, "Found invalid urls in {}:", self.path.display())?;
63 for e in &self.errors {
64 write!(f, "\n\t{}", e)?;
65 }
66 Ok(())
67 }
68}
69
70pub fn walk_dir(dir_path: &Path, ctx: &CheckContext) -> bool {
75 let pool = ThreadPoolBuilder::new()
76 .num_threads(num_cpus::get())
77 .build()
78 .unwrap();
79
80 pool.install(|| {
81 unavailable_urls(dir_path, ctx)
82 .map(|mut err| {
83 if !ctx.verbose {
84 err.shorten_all(dir_path);
85 }
86 println!("{}", err);
87 true
88 })
89 .reduce(|| false, |initial, new| initial || new)
91 })
92}
93
94impl FileError {
95 fn shorten_all(&mut self, prefix: &Path) {
96 use check::Link;
97
98 if let Ok(shortened) = self.path.strip_prefix(&prefix) {
99 self.path = shortened.to_path_buf();
100 };
101 for mut e in &mut self.errors {
102 if let CheckError::File(epath) | CheckError::Fragment(Link::File(epath), _, _) = &mut e
103 {
104 if let Ok(shortened) = epath.strip_prefix(prefix) {
105 *epath = shortened.to_path_buf();
106 }
107 }
108 }
109 }
110}
111
112fn is_html_file(entry: &DirEntry) -> bool {
113 match entry.path().extension() {
114 Some(e) => e.to_str().map(|ext| ext == "html").unwrap_or(false),
115 None => false,
116 }
117}
118
119pub fn unavailable_urls<'a>(
120 dir_path: &'a Path,
121 ctx: &'a CheckContext,
122) -> impl ParallelIterator<Item = FileError> + 'a {
123 let root_url = Url::from_directory_path(dir_path).unwrap();
124
125 WalkDir::new(dir_path)
126 .into_iter()
127 .par_bridge()
128 .filter_map(Result::ok)
129 .filter(|entry| entry.file_type().is_file() && is_html_file(entry))
130 .flat_map(move |entry| {
131 let path = entry.path();
132 info!("Checking doc page at {}", path.display());
133 let html = std::fs::read_to_string(path)
134 .unwrap_or_else(|e| panic!("{} did not contain valid UTF8: {}", path.display(), e));
135
136 let file_url = Url::from_file_path(path).unwrap();
137 let urls = parse::parse_a_hrefs(&html, &root_url, &file_url);
138 let broken_intra_doc_links = if ctx.check_intra_doc_links {
139 parse::broken_intra_doc_links(&html)
140 } else {
141 Vec::new()
142 };
143 let errors = urls
144 .into_iter()
145 .filter_map(|url| is_available(&url, ctx).err())
146 .chain(broken_intra_doc_links)
147 .collect::<Vec<_>>();
148
149 if errors.is_empty() {
150 None
151 } else {
152 let path = entry.path().to_owned();
153 Some(FileError { path, errors })
154 }
155 })
156}