1use std::collections::HashSet;
3use std::fmt;
4use std::fs::read_to_string;
5use std::path::{Path, PathBuf};
6
7use log::{debug, warn};
8use once_cell::sync::Lazy;
9use regex::Regex;
10use url::Url;
11
12use cached::cached_key_result;
13use cached::SizedCache;
14
15use super::CheckContext;
16
17use crate::{
18 parse::{parse_fragments, parse_redirect},
19 HttpCheck,
20};
21
22const PREFIX_BLACKLIST: [&str; 1] = ["https://doc.rust-lang.org"];
23
24#[derive(Debug)]
25pub enum IoError {
26 HttpUnexpectedStatus(ureq::Response),
27 HttpFetch(ureq::Transport),
28 FileIo(String, std::io::Error),
29}
30
31impl fmt::Display for IoError {
32 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
33 match self {
34 IoError::HttpUnexpectedStatus(resp) => write!(
35 f,
36 "Unexpected HTTP status fetching {}: {}",
37 resp.get_url(),
38 resp.status_text()
39 ),
40 IoError::HttpFetch(e) => write!(f, "Error fetching {}", e),
41 IoError::FileIo(url, e) => write!(f, "Error fetching {}: {}", url, e),
42 }
43 }
44}
45
46#[derive(Debug, Clone)]
47pub enum Link {
48 File(PathBuf),
49 Http(Url),
50}
51
52impl fmt::Display for Link {
53 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
54 match self {
55 Link::File(path) => write!(f, "{}", path.display()),
56 Link::Http(url) => f.write_str(url.as_str()),
57 }
58 }
59}
60
61impl Link {
62 fn without_fragment(&self) -> Link {
64 match self {
65 Link::Http(url) => {
66 let mut url = url.clone();
67 url.set_fragment(None);
68
69 Link::Http(url)
70 }
71 _ => self.clone(),
72 }
73 }
74}
75
76#[derive(Debug)]
77pub enum CheckError {
78 IntraDocLink(String),
80 File(PathBuf),
82 Http(Url),
84 HttpForbidden(Url),
86 Fragment(Link, String, Option<Vec<String>>),
88 Io(Box<IoError>),
90}
91
92impl From<ureq::Error> for CheckError {
93 fn from(err: ureq::Error) -> Self {
94 let io_err = match err {
95 ureq::Error::Status(_, response) => IoError::HttpUnexpectedStatus(response),
96 ureq::Error::Transport(err) => IoError::HttpFetch(err),
97 };
98 CheckError::Io(Box::new(io_err))
99 }
100}
101
102impl fmt::Display for CheckError {
103 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
104 match self {
105 CheckError::IntraDocLink(text) => {
106 write!(f, "Broken intra-doc link to {}!", text)
107 }
108 CheckError::File(path) => {
109 write!(f, "Linked file at path {} does not exist!", path.display())
110 }
111 CheckError::Http(url) => write!(f, "Linked URL {} does not exist!", url),
112 CheckError::HttpForbidden(url) => write!(
113 f,
114 "Found HTTP link {}, but HTTP checking is forbidden!",
115 url
116 ),
117 CheckError::Fragment(link, fragment, missing_parts) => match missing_parts {
118 Some(missing_parts) => write!(
119 f,
120 "Fragments #{} as expected by ranged fragment #{} at {} do not exist!\n\
121 This is likely a bug in rustdoc itself.",
122 missing_parts.join(", #"),
123 fragment,
124 link
125 ),
126 None => write!(f, "Fragment #{} at {} does not exist!", fragment, link),
127 },
128 CheckError::Io(err) => err.fmt(f),
129 }
130 }
131}
132
133pub fn is_available(url: &Url, ctx: &CheckContext) -> Result<(), CheckError> {
135 match url.scheme() {
136 "file" => check_file_url(url, ctx),
137 "http" | "https" => check_http_url(url, ctx),
138 scheme @ "javascript" => {
139 debug!("Not checking URL scheme {:?}", scheme);
140 Ok(())
141 }
142 other => {
143 debug!("Unrecognized URL scheme {:?}", other);
144 Ok(())
145 }
146 }
147}
148
149cached_key_result! {
150 CHECK_FILE: SizedCache<String, HashSet<String>> = SizedCache::with_size(100);
151 Key = { link.without_fragment().to_string() };
152 fn fragments_from(
155 link: &Link,
156 fetch_html: impl Fn() -> Result<String, CheckError>
157 ) -> Result<HashSet<String>, CheckError> = {
158 fetch_html().map(|html| parse_fragments(&html))
159 }
160}
161
162fn is_fragment_available(
163 link: &Link,
164 fragment: &str,
165 fetch_html: impl Fn() -> Result<String, CheckError>,
166) -> Result<(), CheckError> {
167 if fragment.is_empty() {
170 return Ok(());
171 }
172
173 let fragments = fragments_from(link, fetch_html)?;
174
175 if fragments.contains(fragment) {
176 return Ok(());
177 }
178
179 match percent_encoding::percent_decode(fragment.as_bytes()).decode_utf8() {
182 Ok(cow) => {
183 if fragments.contains(&*cow) {
184 return Ok(());
185 }
186 }
187 Err(err) => warn!("{} url-decoded to invalid UTF8: {}", fragment, err),
190 }
191
192 static RUST_LINE_HIGLIGHT_RX: Lazy<Regex> =
198 Lazy::new(|| Regex::new(r#"^(?P<start>[0-9]+)-(?P<end>[0-9]+)$"#).unwrap());
199 match RUST_LINE_HIGLIGHT_RX.captures(fragment) {
200 Some(capture) => match (capture.name("start"), capture.name("end")) {
201 (Some(start_str), Some(end_str)) => {
202 let start = start_str.as_str().parse::<i32>().unwrap();
204 let end = end_str.as_str().parse::<i32>().unwrap();
205 let missing = (start..=end)
206 .map(|i| i.to_string())
207 .filter(|i| !fragments.contains(i))
208 .collect::<Vec<String>>();
209 if !missing.is_empty() {
210 Err(CheckError::Fragment(
211 link.clone(),
212 fragment.to_string(),
213 Some(missing),
214 ))
215 } else {
216 Ok(())
217 }
218 }
219 _ => unreachable!("if the regex matches, it should have capture groups"),
220 },
221 None => Err(CheckError::Fragment(
222 link.clone(),
223 fragment.to_string(),
224 None,
225 )),
226 }
227}
228
229fn check_file_url(url: &Url, ctx: &CheckContext) -> Result<(), CheckError> {
231 let path = url.to_file_path().unwrap();
232
233 let index_html;
237 let expanded_path = if path.is_file() {
238 &path
239 } else if path.is_dir() && path.join("index.html").is_file() {
240 index_html = path.join("index.html");
241 &index_html
242 } else {
243 debug!("Linked file at path {} does not exist!", path.display());
244 return Err(CheckError::File(path));
245 };
246
247 if !ctx.check_fragments {
248 return Ok(());
249 }
250
251 match url.fragment() {
254 Some(fragment) => check_file_fragment(&path, expanded_path, fragment),
255 None => Ok(()),
256 }
257}
258
259fn check_file_fragment(
260 path: &Path,
261 expanded_path: &Path,
262 fragment: &str,
263) -> Result<(), CheckError> {
264 debug!(
265 "Checking fragment {} of file {}.",
266 fragment,
267 expanded_path.display()
268 );
269
270 fn get_html(expanded_path: &Path) -> Result<String, CheckError> {
271 read_to_string(expanded_path).map_err(|err| {
272 CheckError::Io(Box::new(IoError::FileIo(
273 expanded_path.to_string_lossy().to_string(),
274 err,
275 )))
276 })
277 }
278
279 let fetch_html = || {
280 let html = get_html(expanded_path)?;
281 if let Some(redirect) = parse_redirect(&html) {
282 get_html(&expanded_path.parent().unwrap().join(redirect))
283 } else {
284 Ok(html)
285 }
286 };
287 is_fragment_available(&Link::File(path.to_path_buf()), fragment, fetch_html)
288}
289
290fn check_http_url(url: &Url, ctx: &CheckContext) -> Result<(), CheckError> {
292 if ctx.check_http == HttpCheck::Ignored {
293 warn!(
294 "Skip checking {} as checking of http URLs is turned off",
295 url
296 );
297 return Ok(());
298 }
299
300 for blacklisted_prefix in PREFIX_BLACKLIST.iter() {
301 if url.as_str().starts_with(blacklisted_prefix) {
302 warn!(
303 "Skip checking {} as URL prefix is on the builtin blacklist",
304 url
305 );
306 return Ok(());
307 }
308 }
309
310 if ctx.check_http == HttpCheck::Forbidden {
311 return Err(CheckError::HttpForbidden(url.clone()));
312 }
313
314 if url.fragment().is_none() || !ctx.check_fragments {
317 match ureq::head(url.as_str()).call() {
318 Err(ureq::Error::Status(405, _)) => {
319 ureq::get(url.as_str()).call()?;
321 Ok(())
322 }
323 Err(other) => Err(other.into()),
324 Ok(_) => Ok(()),
325 }
326 } else {
327 check_http_fragment(url, url.fragment().unwrap())
330 }
331}
332
333fn check_http_fragment(url: &Url, fragment: &str) -> Result<(), CheckError> {
334 debug!("Checking fragment {} of URL {}.", fragment, url.as_str());
335
336 fn get_html(url: &Url) -> Result<String, CheckError> {
337 let resp = ureq::get(url.as_str()).call()?;
338 Ok(resp.into_string().unwrap())
339 }
340
341 let fetch_html = || {
342 let html = get_html(url)?;
343 let redirect = parse_redirect(&html).and_then(|s| {
345 Url::parse(&s)
346 .map_err(|err| {
347 warn!("failed to parse Rustdoc redirect: {}", err);
348 })
349 .ok()
350 });
351 if let Some(redirect) = redirect {
352 get_html(&redirect)
353 } else {
354 Ok(html)
355 }
356 };
357
358 is_fragment_available(&Link::Http(url.clone()), fragment, fetch_html)?;
359 Ok(())
360}
361
362#[cfg(test)]
363mod test {
364 use crate::HttpCheck;
365
366 use super::{check_file_url, is_available, CheckContext, CheckError, Link};
367 use mockito::{self, mock};
368 use std::env;
369 use url::Url;
370
371 fn url_for(path: &str) -> Url {
372 let cwd = env::current_dir().unwrap();
373 let mut parts = path.split('#');
374 let file_path = parts.next().unwrap();
375
376 let mut url = if file_path.ends_with('/') {
377 Url::from_directory_path(cwd.join(file_path))
378 } else {
379 Url::from_file_path(cwd.join(file_path))
380 }
381 .unwrap();
382
383 url.set_fragment(parts.next());
384 assert_eq!(parts.count(), 0); url
387 }
388
389 fn test_check_file_url(path: &str) -> Result<(), CheckError> {
390 check_file_url(&url_for(path), &CheckContext::default())
391 }
392
393 #[test]
394 fn test_file_path() {
395 test_check_file_url("tests/html/index.html").unwrap();
396 }
397
398 #[test]
399 fn test_directory_path() {
400 test_check_file_url("tests/html/").unwrap();
401 }
402
403 #[test]
404 fn test_anchors() {
405 test_check_file_url("tests/html/anchors.html#h1").unwrap();
406 }
407
408 #[test]
409 fn test_hash_fragment() {
410 test_check_file_url("tests/html/anchors.html#").unwrap();
411 }
412
413 #[test]
414 fn test_missing_anchors() {
415 match test_check_file_url("tests/html/anchors.html#nonexistent") {
416 Err(CheckError::Fragment(Link::File(path), fragment, None)) => {
417 assert!(path.ends_with("tests/html/anchors.html"));
418 assert_eq!("nonexistent", fragment);
419 }
420 x => panic!(
421 "Expected to report missing anchor (Err(CheckError::FileAnchor)), got {:?}",
422 x
423 ),
424 }
425 }
426
427 #[test]
428 fn test_range_anchor() {
429 test_check_file_url("tests/html/range.html#2-4").unwrap();
430 }
431
432 #[test]
433 fn test_missing_range_anchor() {
434 match test_check_file_url("tests/html/range.html#4-6") {
435 Err(CheckError::Fragment(Link::File(path), fragment, Some(missing_parts))) => {
436 assert!(path.ends_with("tests/html/range.html"));
437 assert_eq!("4-6", fragment);
438 assert_eq!(missing_parts.len(), 1);
439 assert!(missing_parts.contains(&"6".to_string()));
440 }
441 x => panic!(
442 "Expected to report missing anchor (Err(CheckError::FileAnchorRange)), got {:?}",
443 x
444 ),
445 }
446 }
447
448 #[test]
449 fn test_is_available_file_path() {
450 is_available(
451 &url_for("tests/html/index.html#i1"),
452 &CheckContext::default(),
453 )
454 .unwrap();
455 }
456
457 #[test]
458 fn test_is_available_directory_path() {
459 is_available(&url_for("tests/html/#i1"), &CheckContext::default()).unwrap();
460 }
461
462 #[test]
463 fn test_missing_dir_index_fragment() {
464 match is_available(
465 &url_for("tests/html/missing_index/#i1"),
466 &CheckContext::default(),
467 ) {
468 Err(CheckError::File(path)) => assert!(path.ends_with("tests/html/missing_index")),
469 x => panic!(
470 "Expected to report missing anchor (Err(CheckError::File)), got {:?}",
471 x
472 ),
473 }
474 }
475
476 #[test]
477 fn test_http_check() {
478 let root = mock("HEAD", "/test_http_check").with_status(200).create();
479
480 let mut url = mockito::server_url();
481 url.push_str("/test_http_check");
482
483 is_available(
484 &Url::parse(&url).unwrap(),
485 &CheckContext {
486 check_http: HttpCheck::Enabled,
487 ..CheckContext::default()
488 },
489 )
490 .unwrap();
491
492 root.assert();
493 }
494
495 #[test]
496 fn test_http_check_fragment() {
497 let root = mock("GET", "/test_http_check_fragment")
498 .with_status(200)
499 .with_header("content-type", "text/html")
500 .with_body(
501 r#"<!DOCTYPE html>
502 <html>
503 <body id="r1" />
504 </html>"#,
505 )
506 .create();
507
508 let mut url = mockito::server_url();
509 url.push_str("/test_http_check_fragment#r1");
510
511 is_available(
512 &Url::parse(&url).unwrap(),
513 &CheckContext {
514 check_http: HttpCheck::Enabled,
515 ..CheckContext::default()
516 },
517 )
518 .unwrap();
519
520 root.assert();
521 }
522
523 #[test]
524 fn test_missing_http_fragment() {
525 let root = mock("GET", "/test_missing_http_fragment")
526 .with_status(200)
527 .with_header("content-type", "text/html")
528 .with_body(
529 r#"<!DOCTYPE html>
530 <html />"#,
531 )
532 .create();
533
534 let mut url = mockito::server_url();
535 url.push_str("/test_missing_http_fragment#missing");
536
537 match is_available(
538 &Url::parse(&url).unwrap(),
539 &CheckContext {
540 check_http: HttpCheck::Enabled,
541 ..CheckContext::default()
542 },
543 ) {
544 Err(CheckError::Fragment(Link::Http(url), fragment, None)) => {
545 assert_eq!(
546 "http://127.0.0.1:1234/test_missing_http_fragment#missing",
547 url.to_string()
548 );
549 assert_eq!("missing", fragment);
550 }
551 x => panic!(
552 "Expected to report missing anchor (Err(CheckError::File)), got {:?}",
553 x
554 ),
555 }
556
557 root.assert();
558 }
559
560 #[test]
561 fn test_disabling_fragment_checks_file() {
562 check_file_url(
563 &url_for("tests/html/anchors.html#nonexistent"),
564 &CheckContext {
565 check_fragments: false,
566 ..CheckContext::default()
567 },
568 )
569 .unwrap();
570 }
571
572 #[test]
573 fn test_disabling_fragment_checks_http() {
574 let root = mock("HEAD", "/test_disabling_fragment_checks_http")
575 .with_status(200)
576 .create();
577
578 let mut url = mockito::server_url();
579 url.push_str("/test_disabling_fragment_checks_http#missing");
580
581 is_available(
582 &Url::parse(&url).unwrap(),
583 &CheckContext {
584 check_http: HttpCheck::Enabled,
585 check_fragments: false,
586 ..CheckContext::default()
587 },
588 )
589 .unwrap();
590
591 root.assert();
592 }
593}