lychee_lib/client.rs
1//! Handler of link checking operations.
2//!
3//! This module defines two structs, [`Client`] and [`ClientBuilder`].
4//! `Client` handles incoming requests and returns responses.
5//! `ClientBuilder` exposes a finer level of granularity for building
6//! a `Client`.
7//!
8//! For convenience, a free function [`check`] is provided for ad-hoc
9//! link checks.
10#![allow(
11 clippy::module_name_repetitions,
12 clippy::struct_excessive_bools,
13 clippy::default_trait_access,
14 clippy::used_underscore_binding
15)]
16use std::{collections::HashSet, sync::Arc, time::Duration};
17
18use http::{
19 StatusCode,
20 header::{HeaderMap, HeaderValue},
21};
22use log::debug;
23use octocrab::Octocrab;
24use regex::RegexSet;
25use reqwest::{header, redirect, tls};
26use reqwest_cookie_store::CookieStoreMutex;
27use secrecy::{ExposeSecret, SecretString};
28use typed_builder::TypedBuilder;
29
30use crate::{
31 Base, BasicAuthCredentials, ErrorKind, Request, Response, Result, Status, Uri,
32 chain::RequestChain,
33 checker::{file::FileChecker, mail::MailChecker, website::WebsiteChecker},
34 filter::Filter,
35 remap::Remaps,
36 types::DEFAULT_ACCEPTED_STATUS_CODES,
37};
38
39/// Default number of redirects before a request is deemed as failed, 5.
40pub const DEFAULT_MAX_REDIRECTS: usize = 5;
41/// Default number of retries before a request is deemed as failed, 3.
42pub const DEFAULT_MAX_RETRIES: u64 = 3;
43/// Default wait time in seconds between retries, 1.
44pub const DEFAULT_RETRY_WAIT_TIME_SECS: usize = 1;
45/// Default timeout in seconds before a request is deemed as failed, 20.
46pub const DEFAULT_TIMEOUT_SECS: usize = 20;
47/// Default user agent, `lychee-<PKG_VERSION>`.
48pub const DEFAULT_USER_AGENT: &str = concat!("lychee/", env!("CARGO_PKG_VERSION"));
49
50// Constants currently not configurable by the user.
51/// A timeout for only the connect phase of a [`Client`].
52const CONNECT_TIMEOUT: u64 = 10;
53/// TCP keepalive.
54///
55/// See <https://tldp.org/HOWTO/TCP-Keepalive-HOWTO/overview.html> for more
56/// information.
57const TCP_KEEPALIVE: u64 = 60;
58
59/// Builder for [`Client`].
60///
61/// See crate-level documentation for usage example.
62#[derive(TypedBuilder, Debug, Clone)]
63#[builder(field_defaults(default, setter(into)))]
64pub struct ClientBuilder {
65 /// Optional GitHub token used for GitHub links.
66 ///
67 /// This allows much more request before getting rate-limited.
68 ///
69 /// # Rate-limiting Defaults
70 ///
71 /// As of Feb 2022, it's 60 per hour without GitHub token v.s.
72 /// 5000 per hour with token.
73 github_token: Option<SecretString>,
74
75 /// Remap URIs matching a pattern to a different URI.
76 ///
77 /// This makes it possible to remap any HTTP/HTTPS endpoint to a different
78 /// HTTP/HTTPS one. This feature could also be used to proxy
79 /// certain requests.
80 ///
81 /// # Usage Notes
82 ///
83 /// Use with caution because a large set of remapping rules may cause
84 /// performance issues.
85 ///
86 /// Furthermore rules are executed sequentially and multiple mappings for
87 /// the same URI are allowed, so it is up to the library user's discretion to
88 /// make sure rules don't conflict with each other.
89 remaps: Option<Remaps>,
90
91 /// Automatically append file extensions to `file://` URIs as needed
92 ///
93 /// This option takes effect on `file://` URIs which do not exist.
94 fallback_extensions: Vec<String>,
95
96 /// Index file names to use when resolving `file://` URIs which point to
97 /// directories.
98 ///
99 /// For local directory links, if this is non-`None`, then at least one
100 /// index file from this list must exist in order for the link to be
101 /// considered valid. Index files names are required to match regular
102 /// files, aside from the special `.` name which will match the
103 /// directory itself.
104 ///
105 /// If `None`, index file checking is disabled and directory links are valid
106 /// as long as the directory exists on disk.
107 ///
108 /// In the [`ClientBuilder`], this defaults to `None`.
109 #[builder(default = None)]
110 index_files: Option<Vec<String>>,
111
112 /// Links matching this set of regular expressions are **always** checked.
113 ///
114 /// This has higher precedence over [`ClientBuilder::excludes`], **but**
115 /// has lower precedence compared to any other `exclude_` fields or
116 /// [`ClientBuilder::schemes`] below.
117 includes: Option<RegexSet>,
118
119 /// Links matching this set of regular expressions are ignored, **except**
120 /// when a link also matches against [`ClientBuilder::includes`].
121 excludes: Option<RegexSet>,
122
123 /// When `true`, exclude all private network addresses.
124 ///
125 /// This effectively turns on the following fields:
126 /// - [`ClientBuilder::exclude_private_ips`]
127 /// - [`ClientBuilder::exclude_link_local_ips`]
128 /// - [`ClientBuilder::exclude_loopback_ips`]
129 exclude_all_private: bool,
130
131 /// When `true`, exclude private IP addresses.
132 ///
133 /// # IPv4
134 ///
135 /// The private address ranges are defined in [IETF RFC 1918] and include:
136 ///
137 /// - `10.0.0.0/8`
138 /// - `172.16.0.0/12`
139 /// - `192.168.0.0/16`
140 ///
141 /// # IPv6
142 ///
143 /// The address is a unique local address (`fc00::/7`).
144 ///
145 /// This property is defined in [IETF RFC 4193].
146 ///
147 /// # Note
148 ///
149 /// Unicast site-local network was defined in [IETF RFC 4291], but was fully
150 /// deprecated in [IETF RFC 3879]. So it is **NOT** considered as private on
151 /// this purpose.
152 ///
153 /// [IETF RFC 1918]: https://tools.ietf.org/html/rfc1918
154 /// [IETF RFC 4193]: https://tools.ietf.org/html/rfc4193
155 /// [IETF RFC 4291]: https://tools.ietf.org/html/rfc4291
156 /// [IETF RFC 3879]: https://tools.ietf.org/html/rfc3879
157 exclude_private_ips: bool,
158
159 /// When `true`, exclude link-local IPs.
160 ///
161 /// # IPv4
162 ///
163 /// The address is `169.254.0.0/16`.
164 ///
165 /// This property is defined by [IETF RFC 3927].
166 ///
167 /// # IPv6
168 ///
169 /// The address is a unicast address with link-local scope, as defined in
170 /// [RFC 4291].
171 ///
172 /// A unicast address has link-local scope if it has the prefix `fe80::/10`,
173 /// as per [RFC 4291 section 2.4].
174 ///
175 /// [IETF RFC 3927]: https://tools.ietf.org/html/rfc3927
176 /// [RFC 4291]: https://tools.ietf.org/html/rfc4291
177 /// [RFC 4291 section 2.4]: https://tools.ietf.org/html/rfc4291#section-2.4
178 exclude_link_local_ips: bool,
179
180 /// When `true`, exclude loopback IP addresses.
181 ///
182 /// # IPv4
183 ///
184 /// This is a loopback address (`127.0.0.0/8`).
185 ///
186 /// This property is defined by [IETF RFC 1122].
187 ///
188 /// # IPv6
189 ///
190 /// This is the loopback address (`::1`), as defined in
191 /// [IETF RFC 4291 section 2.5.3].
192 ///
193 /// [IETF RFC 1122]: https://tools.ietf.org/html/rfc1122
194 /// [IETF RFC 4291 section 2.5.3]: https://tools.ietf.org/html/rfc4291#section-2.5.3
195 exclude_loopback_ips: bool,
196
197 /// When `true`, check mail addresses.
198 include_mail: bool,
199
200 /// Maximum number of redirects per request before returning an error.
201 ///
202 /// Defaults to [`DEFAULT_MAX_REDIRECTS`].
203 #[builder(default = DEFAULT_MAX_REDIRECTS)]
204 max_redirects: usize,
205
206 /// Maximum number of retries per request before returning an error.
207 ///
208 /// Defaults to [`DEFAULT_MAX_RETRIES`].
209 #[builder(default = DEFAULT_MAX_RETRIES)]
210 max_retries: u64,
211
212 /// Minimum accepted TLS version.
213 min_tls_version: Option<tls::Version>,
214
215 /// User-agent used for checking links.
216 ///
217 /// Defaults to [`DEFAULT_USER_AGENT`].
218 ///
219 /// # Notes
220 ///
221 /// This may be helpful for bypassing certain firewalls.
222 // Faking the user agent is necessary for some websites, unfortunately.
223 // Otherwise we get a 403 from the firewall (e.g. Sucuri/Cloudproxy on ldra.com).
224 #[builder(default_code = "String::from(DEFAULT_USER_AGENT)")]
225 user_agent: String,
226
227 /// When `true`, accept invalid SSL certificates.
228 ///
229 /// # Warning
230 ///
231 /// You should think very carefully before allowing invalid SSL
232 /// certificates. It will accept any certificate for any site to be trusted
233 /// including expired certificates. This introduces significant
234 /// vulnerabilities, and should only be used as a last resort.
235 // TODO: We should add a warning message in CLI. (Lucius, Jan 2023)
236 allow_insecure: bool,
237
238 /// Set of accepted URL schemes.
239 ///
240 /// Only links with matched URI schemes are checked. This has no effect when
241 /// it's empty.
242 schemes: HashSet<String>,
243
244 /// Default [headers] for every request.
245 ///
246 /// This allows working around validation issues on some websites. See also
247 /// [here] for usage examples.
248 ///
249 /// [headers]: https://docs.rs/http/latest/http/header/struct.HeaderName.html
250 /// [here]: https://docs.rs/reqwest/latest/reqwest/struct.ClientBuilder.html#method.default_headers
251 custom_headers: HeaderMap,
252
253 /// HTTP method used for requests, e.g. `GET` or `HEAD`.
254 #[builder(default = reqwest::Method::GET)]
255 method: reqwest::Method,
256
257 /// Set of accepted return codes / status codes.
258 ///
259 /// Unmatched return codes/ status codes are deemed as errors.
260 #[builder(default = DEFAULT_ACCEPTED_STATUS_CODES.clone())]
261 accepted: HashSet<StatusCode>,
262
263 /// Response timeout per request in seconds.
264 timeout: Option<Duration>,
265
266 /// Base for resolving paths.
267 ///
268 /// E.g. if the base is `/home/user/` and the path is `file.txt`, the
269 /// resolved path would be `/home/user/file.txt`.
270 base: Option<Base>,
271
272 /// Initial time between retries of failed requests.
273 ///
274 /// Defaults to [`DEFAULT_RETRY_WAIT_TIME_SECS`].
275 ///
276 /// # Notes
277 ///
278 /// For each request, the wait time increases using an exponential backoff
279 /// mechanism. For example, if the value is 1 second, then it waits for
280 /// 2 ^ (N-1) seconds before the N-th retry.
281 ///
282 /// This prevents spending too much system resources on slow responders and
283 /// prioritizes other requests.
284 #[builder(default_code = "Duration::from_secs(DEFAULT_RETRY_WAIT_TIME_SECS as u64)")]
285 retry_wait_time: Duration,
286
287 /// When `true`, requires using HTTPS when it's available.
288 ///
289 /// This would treat unencrypted links as errors when HTTPS is available.
290 /// It has no effect on non-HTTP schemes or if the URL doesn't support
291 /// HTTPS.
292 require_https: bool,
293
294 /// Cookie store used for requests.
295 ///
296 /// See <https://docs.rs/reqwest/latest/reqwest/struct.ClientBuilder.html#method.cookie_store>
297 cookie_jar: Option<Arc<CookieStoreMutex>>,
298
299 /// Enable the checking of fragments in links.
300 include_fragments: bool,
301
302 /// Requests run through this chain where each item in the chain
303 /// can modify the request. A chained item can also decide to exit
304 /// early and return a status, so that subsequent chain items are
305 /// skipped and the lychee-internal request chain is not activated.
306 plugin_request_chain: RequestChain,
307}
308
309impl Default for ClientBuilder {
310 #[inline]
311 fn default() -> Self {
312 Self::builder().build()
313 }
314}
315
316impl ClientBuilder {
317 /// Instantiates a [`Client`].
318 ///
319 /// # Errors
320 ///
321 /// Returns an `Err` if:
322 /// - The user-agent contains characters other than ASCII 32-127.
323 /// - The reqwest client cannot be instantiated. This occurs if a TLS
324 /// backend cannot be initialized or the resolver fails to load the system
325 /// configuration. See [here].
326 /// - The GitHub client cannot be created. Since the implementation also
327 /// uses reqwest under the hood, this errors in the same circumstances as
328 /// the last one.
329 ///
330 /// [here]: https://docs.rs/reqwest/latest/reqwest/struct.ClientBuilder.html#errors
331 pub fn client(self) -> Result<Client> {
332 let Self {
333 user_agent,
334 custom_headers: mut headers,
335 ..
336 } = self;
337
338 if let Some(prev_user_agent) =
339 headers.insert(header::USER_AGENT, HeaderValue::try_from(&user_agent)?)
340 {
341 debug!(
342 "Found user-agent in headers: {}. Overriding it with {user_agent}.",
343 prev_user_agent.to_str().unwrap_or("�"),
344 );
345 }
346
347 headers.insert(
348 header::TRANSFER_ENCODING,
349 HeaderValue::from_static("chunked"),
350 );
351
352 // Custom redirect policy to enable logging of redirects.
353 let max_redirects = self.max_redirects;
354 let redirect_policy = redirect::Policy::custom(move |attempt| {
355 if attempt.previous().len() > max_redirects {
356 attempt.error("too many redirects")
357 } else {
358 debug!("Redirecting to {}", attempt.url());
359 attempt.follow()
360 }
361 });
362
363 let mut builder = reqwest::ClientBuilder::new()
364 .gzip(true)
365 .default_headers(headers)
366 .danger_accept_invalid_certs(self.allow_insecure)
367 .connect_timeout(Duration::from_secs(CONNECT_TIMEOUT))
368 .tcp_keepalive(Duration::from_secs(TCP_KEEPALIVE))
369 .redirect(redirect_policy);
370
371 if let Some(cookie_jar) = self.cookie_jar {
372 builder = builder.cookie_provider(cookie_jar);
373 }
374
375 if let Some(min_tls) = self.min_tls_version {
376 builder = builder.min_tls_version(min_tls);
377 }
378
379 let reqwest_client = match self.timeout {
380 Some(t) => builder.timeout(t),
381 None => builder,
382 }
383 .build()
384 .map_err(ErrorKind::BuildRequestClient)?;
385
386 let github_client = match self.github_token.as_ref().map(ExposeSecret::expose_secret) {
387 Some(token) if !token.is_empty() => Some(
388 Octocrab::builder()
389 .personal_token(token.to_string())
390 .build()
391 // this is essentially the same `reqwest::ClientBuilder::build` error
392 // see https://docs.rs/octocrab/0.18.1/src/octocrab/lib.rs.html#360-364
393 .map_err(|e: octocrab::Error| ErrorKind::BuildGithubClient(Box::new(e)))?,
394 ),
395 _ => None,
396 };
397
398 let filter = Filter {
399 includes: self.includes.map(Into::into),
400 excludes: self.excludes.map(Into::into),
401 schemes: self.schemes,
402 // exclude_all_private option turns on all "private" excludes,
403 // including private IPs, link-local IPs and loopback IPs
404 exclude_private_ips: self.exclude_all_private || self.exclude_private_ips,
405 exclude_link_local_ips: self.exclude_all_private || self.exclude_link_local_ips,
406 exclude_loopback_ips: self.exclude_all_private || self.exclude_loopback_ips,
407 include_mail: self.include_mail,
408 };
409
410 let website_checker = WebsiteChecker::new(
411 self.method,
412 self.retry_wait_time,
413 self.max_retries,
414 reqwest_client,
415 self.accepted,
416 github_client,
417 self.require_https,
418 self.plugin_request_chain,
419 self.include_fragments,
420 );
421
422 Ok(Client {
423 remaps: self.remaps,
424 filter,
425 email_checker: MailChecker::new(),
426 website_checker,
427 file_checker: FileChecker::new(
428 self.base,
429 self.fallback_extensions,
430 self.index_files,
431 self.include_fragments,
432 ),
433 })
434 }
435}
436
437/// Handles incoming requests and returns responses.
438///
439/// See [`ClientBuilder`] which contains sane defaults for all configuration
440/// options.
441#[derive(Debug, Clone)]
442pub struct Client {
443 /// Optional remapping rules for URIs matching pattern.
444 remaps: Option<Remaps>,
445
446 /// Rules to decided whether each link should be checked or ignored.
447 filter: Filter,
448
449 /// A checker for website URLs.
450 website_checker: WebsiteChecker,
451
452 /// A checker for file URLs.
453 file_checker: FileChecker,
454
455 /// A checker for email URLs.
456 email_checker: MailChecker,
457}
458
459impl Client {
460 /// Check a single request.
461 ///
462 /// `request` can be either a [`Request`] or a type that can be converted
463 /// into it. In any case, it must represent a valid URI.
464 ///
465 /// # Errors
466 ///
467 /// Returns an `Err` if:
468 /// - `request` does not represent a valid URI.
469 /// - Encrypted connection for a HTTP URL is available but unused. (Only
470 /// checked when `Client::require_https` is `true`.)
471 #[allow(clippy::missing_panics_doc)]
472 pub async fn check<T, E>(&self, request: T) -> Result<Response>
473 where
474 Request: TryFrom<T, Error = E>,
475 ErrorKind: From<E>,
476 {
477 let Request {
478 ref mut uri,
479 credentials,
480 source,
481 ..
482 } = request.try_into()?;
483
484 // Allow filtering based on element and attribute
485 // if !self.filter.is_allowed(uri) {
486 // return Ok(Response::new(
487 // uri.clone(),
488 // Status::Excluded,
489 // source,
490 // ));
491 // }
492
493 self.remap(uri)?;
494
495 if self.is_excluded(uri) {
496 return Ok(Response::new(uri.clone(), Status::Excluded, source));
497 }
498
499 let status = match uri.scheme() {
500 // We don't check tel: URIs
501 _ if uri.is_tel() => Status::Excluded,
502 _ if uri.is_file() => self.check_file(uri).await,
503 _ if uri.is_mail() => self.check_mail(uri).await,
504 _ => self.check_website(uri, credentials).await?,
505 };
506
507 Ok(Response::new(uri.clone(), status, source))
508 }
509
510 /// Check a single file using the file checker.
511 pub async fn check_file(&self, uri: &Uri) -> Status {
512 self.file_checker.check(uri).await
513 }
514
515 /// Remap `uri` using the client-defined remapping rules.
516 ///
517 /// # Errors
518 ///
519 /// Returns an `Err` if the final, remapped `uri` is not a valid URI.
520 pub fn remap(&self, uri: &mut Uri) -> Result<()> {
521 if let Some(ref remaps) = self.remaps {
522 uri.url = remaps.remap(&uri.url)?;
523 }
524 Ok(())
525 }
526
527 /// Returns whether the given `uri` should be ignored from checking.
528 #[must_use]
529 pub fn is_excluded(&self, uri: &Uri) -> bool {
530 self.filter.is_excluded(uri)
531 }
532
533 /// Checks the given URI of a website.
534 ///
535 /// # Errors
536 ///
537 /// This returns an `Err` if
538 /// - The URI is invalid.
539 /// - The request failed.
540 /// - The response status code is not accepted.
541 /// - The URI cannot be converted to HTTPS.
542 pub async fn check_website(
543 &self,
544 uri: &Uri,
545 credentials: Option<BasicAuthCredentials>,
546 ) -> Result<Status> {
547 self.website_checker.check_website(uri, credentials).await
548 }
549
550 /// Checks a `mailto` URI.
551 pub async fn check_mail(&self, uri: &Uri) -> Status {
552 self.email_checker.check_mail(uri).await
553 }
554}
555
556/// A shorthand function to check a single URI.
557///
558/// This provides the simplest link check utility without having to create a
559/// [`Client`]. For more complex scenarios, see documentation of
560/// [`ClientBuilder`] instead.
561///
562/// # Errors
563///
564/// Returns an `Err` if:
565/// - The request client cannot be built (see [`ClientBuilder::client`] for
566/// failure cases).
567/// - The request cannot be checked (see [`Client::check`] for failure cases).
568pub async fn check<T, E>(request: T) -> Result<Response>
569where
570 Request: TryFrom<T, Error = E>,
571 ErrorKind: From<E>,
572{
573 let client = ClientBuilder::builder().build().client()?;
574 client.check(request).await
575}
576
577#[cfg(test)]
578mod tests {
579 use std::{
580 fs::File,
581 time::{Duration, Instant},
582 };
583
584 use async_trait::async_trait;
585 use http::{StatusCode, header::HeaderMap};
586 use reqwest::header;
587 use tempfile::tempdir;
588 use wiremock::matchers::path;
589
590 use super::ClientBuilder;
591 use crate::{
592 ErrorKind, Request, Status, Uri,
593 chain::{ChainResult, Handler, RequestChain},
594 mock_server,
595 test_utils::get_mock_client_response,
596 };
597
598 #[tokio::test]
599 async fn test_nonexistent() {
600 let mock_server = mock_server!(StatusCode::NOT_FOUND);
601 let res = get_mock_client_response(mock_server.uri()).await;
602
603 assert!(res.status().is_error());
604 }
605
606 #[tokio::test]
607 async fn test_nonexistent_with_path() {
608 let res = get_mock_client_response("http://127.0.0.1/invalid").await;
609 assert!(res.status().is_error());
610 }
611
612 #[tokio::test]
613 async fn test_github() {
614 let res = get_mock_client_response("https://github.com/lycheeverse/lychee").await;
615 assert!(res.status().is_success());
616 }
617
618 #[tokio::test]
619 async fn test_github_nonexistent_repo() {
620 let res = get_mock_client_response("https://github.com/lycheeverse/not-lychee").await;
621 assert!(res.status().is_error());
622 }
623
624 #[tokio::test]
625 async fn test_github_nonexistent_file() {
626 let res = get_mock_client_response(
627 "https://github.com/lycheeverse/lychee/blob/master/NON_EXISTENT_FILE.md",
628 )
629 .await;
630 assert!(res.status().is_error());
631 }
632
633 #[tokio::test]
634 async fn test_youtube() {
635 // This is applying a quirk. See the quirks module.
636 let res = get_mock_client_response("https://www.youtube.com/watch?v=NlKuICiT470&list=PLbWDhxwM_45mPVToqaIZNbZeIzFchsKKQ&index=7").await;
637 assert!(res.status().is_success());
638
639 let res = get_mock_client_response("https://www.youtube.com/watch?v=invalidNlKuICiT470&list=PLbWDhxwM_45mPVToqaIZNbZeIzFchsKKQ&index=7").await;
640 assert!(res.status().is_error());
641 }
642
643 #[tokio::test]
644 async fn test_basic_auth() {
645 let mut r: Request = "https://authenticationtest.com/HTTPAuth/"
646 .try_into()
647 .unwrap();
648
649 let res = get_mock_client_response(r.clone()).await;
650 assert_eq!(res.status().code(), Some(401.try_into().unwrap()));
651
652 r.credentials = Some(crate::BasicAuthCredentials {
653 username: "user".into(),
654 password: "pass".into(),
655 });
656
657 let res = get_mock_client_response(r).await;
658 assert!(res.status().is_success());
659 }
660
661 #[tokio::test]
662 async fn test_non_github() {
663 let mock_server = mock_server!(StatusCode::OK);
664 let res = get_mock_client_response(mock_server.uri()).await;
665
666 assert!(res.status().is_success());
667 }
668
669 #[tokio::test]
670 async fn test_invalid_ssl() {
671 let res = get_mock_client_response("https://expired.badssl.com/").await;
672
673 assert!(res.status().is_error());
674
675 // Same, but ignore certificate error
676 let res = ClientBuilder::builder()
677 .allow_insecure(true)
678 .build()
679 .client()
680 .unwrap()
681 .check("https://expired.badssl.com/")
682 .await
683 .unwrap();
684 assert!(res.status().is_success());
685 }
686
687 #[tokio::test]
688 async fn test_file() {
689 let dir = tempdir().unwrap();
690 let file = dir.path().join("temp");
691 File::create(file).unwrap();
692 let uri = format!("file://{}", dir.path().join("temp").to_str().unwrap());
693
694 let res = get_mock_client_response(uri).await;
695 assert!(res.status().is_success());
696 }
697
698 #[tokio::test]
699 async fn test_custom_headers() {
700 // See https://github.com/rust-lang/crates.io/issues/788
701 let mut custom = HeaderMap::new();
702 custom.insert(header::ACCEPT, "text/html".parse().unwrap());
703 let res = ClientBuilder::builder()
704 .custom_headers(custom)
705 .build()
706 .client()
707 .unwrap()
708 .check("https://crates.io/crates/lychee")
709 .await
710 .unwrap();
711 assert!(res.status().is_success());
712 }
713
714 #[tokio::test]
715 async fn test_exclude_mail_by_default() {
716 let client = ClientBuilder::builder()
717 .exclude_all_private(true)
718 .build()
719 .client()
720 .unwrap();
721 assert!(client.is_excluded(&Uri {
722 url: "mailto://mail@example.com".try_into().unwrap()
723 }));
724 }
725
726 #[tokio::test]
727 async fn test_include_mail() {
728 let client = ClientBuilder::builder()
729 .include_mail(false)
730 .exclude_all_private(true)
731 .build()
732 .client()
733 .unwrap();
734 assert!(client.is_excluded(&Uri {
735 url: "mailto://mail@example.com".try_into().unwrap()
736 }));
737
738 let client = ClientBuilder::builder()
739 .include_mail(true)
740 .exclude_all_private(true)
741 .build()
742 .client()
743 .unwrap();
744 assert!(!client.is_excluded(&Uri {
745 url: "mailto://mail@example.com".try_into().unwrap()
746 }));
747 }
748
749 #[tokio::test]
750 async fn test_include_tel() {
751 let client = ClientBuilder::builder().build().client().unwrap();
752 assert!(client.is_excluded(&Uri {
753 url: "tel:1234567890".try_into().unwrap()
754 }));
755 }
756
757 #[tokio::test]
758 async fn test_require_https() {
759 let client = ClientBuilder::builder().build().client().unwrap();
760 let res = client.check("http://example.com").await.unwrap();
761 assert!(res.status().is_success());
762
763 // Same request will fail if HTTPS is required
764 let client = ClientBuilder::builder()
765 .require_https(true)
766 .build()
767 .client()
768 .unwrap();
769 let res = client.check("http://example.com").await.unwrap();
770 assert!(res.status().is_error());
771 }
772
773 #[tokio::test]
774 async fn test_timeout() {
775 // Note: this checks response timeout, not connect timeout.
776 // To check connect timeout, we'd have to do something more involved,
777 // see: https://github.com/LukeMathWalker/wiremock-rs/issues/19
778 let mock_delay = Duration::from_millis(20);
779 let checker_timeout = Duration::from_millis(10);
780 assert!(mock_delay > checker_timeout);
781
782 let mock_server = mock_server!(StatusCode::OK, set_delay(mock_delay));
783
784 let client = ClientBuilder::builder()
785 .timeout(checker_timeout)
786 .build()
787 .client()
788 .unwrap();
789
790 let res = client.check(mock_server.uri()).await.unwrap();
791 assert!(res.status().is_timeout());
792 }
793
794 #[tokio::test]
795 async fn test_exponential_backoff() {
796 let mock_delay = Duration::from_millis(20);
797 let checker_timeout = Duration::from_millis(10);
798 assert!(mock_delay > checker_timeout);
799
800 let mock_server = mock_server!(StatusCode::OK, set_delay(mock_delay));
801
802 // Perform a warm-up request to ensure the lazy regexes
803 // in lychee-lib/src/quirks/mod.rs are compiled.
804 // On some platforms, this can take some time(approx. 110ms),
805 // which should not be counted in the test.
806 let warm_up_client = ClientBuilder::builder()
807 .max_retries(0_u64)
808 .build()
809 .client()
810 .unwrap();
811 let _res = warm_up_client.check(mock_server.uri()).await.unwrap();
812
813 let client = ClientBuilder::builder()
814 .timeout(checker_timeout)
815 .max_retries(3_u64)
816 .retry_wait_time(Duration::from_millis(50))
817 .build()
818 .client()
819 .unwrap();
820
821 // Summary:
822 // 1. First request fails with timeout (after 10ms)
823 // 2. Retry after 50ms (total 60ms)
824 // 3. Second request fails with timeout (after 10ms)
825 // 4. Retry after 100ms (total 160ms)
826 // 5. Third request fails with timeout (after 10ms)
827 // 6. Retry after 200ms (total 360ms)
828 // Total: 360ms
829
830 let start = Instant::now();
831 let res = client.check(mock_server.uri()).await.unwrap();
832 let end = start.elapsed();
833
834 assert!(res.status().is_error());
835
836 // on slow connections, this might take a bit longer than nominal
837 // backed-off timeout (7 secs)
838 assert!((350..=550).contains(&end.as_millis()));
839 }
840
841 #[tokio::test]
842 async fn test_avoid_reqwest_panic() {
843 let client = ClientBuilder::builder().build().client().unwrap();
844 // This request will result in an Unsupported status, but it won't panic
845 let res = client.check("http://\"").await.unwrap();
846
847 assert!(matches!(
848 res.status(),
849 Status::Unsupported(ErrorKind::BuildRequestClient(_))
850 ));
851 assert!(res.status().is_unsupported());
852 }
853
854 #[tokio::test]
855 async fn test_max_redirects() {
856 let mock_server = wiremock::MockServer::start().await;
857
858 let ok_uri = format!("{}/ok", &mock_server.uri());
859 let redirect_uri = format!("{}/redirect", &mock_server.uri());
860
861 // Set up permanent redirect loop
862 let redirect = wiremock::ResponseTemplate::new(StatusCode::PERMANENT_REDIRECT)
863 .insert_header("Location", ok_uri.as_str());
864 wiremock::Mock::given(wiremock::matchers::method("GET"))
865 .and(path("/redirect"))
866 .respond_with(redirect)
867 .mount(&mock_server)
868 .await;
869
870 let ok = wiremock::ResponseTemplate::new(StatusCode::OK);
871 wiremock::Mock::given(wiremock::matchers::method("GET"))
872 .and(path("/ok"))
873 .respond_with(ok)
874 .mount(&mock_server)
875 .await;
876
877 let client = ClientBuilder::builder()
878 .max_redirects(0_usize)
879 .build()
880 .client()
881 .unwrap();
882
883 let res = client.check(redirect_uri.clone()).await.unwrap();
884 assert!(res.status().is_error());
885
886 let client = ClientBuilder::builder()
887 .max_redirects(1_usize)
888 .build()
889 .client()
890 .unwrap();
891
892 let res = client.check(redirect_uri).await.unwrap();
893 assert!(res.status().is_success());
894 }
895
896 #[tokio::test]
897 async fn test_limit_max_redirects() {
898 let mock_server = wiremock::MockServer::start().await;
899
900 // Set up permanent redirect loop
901 let template = wiremock::ResponseTemplate::new(StatusCode::PERMANENT_REDIRECT)
902 .insert_header("Location", mock_server.uri().as_str());
903 wiremock::Mock::given(wiremock::matchers::method("GET"))
904 .respond_with(template)
905 .mount(&mock_server)
906 .await;
907
908 let client = ClientBuilder::builder()
909 .max_redirects(0_usize)
910 .build()
911 .client()
912 .unwrap();
913
914 let res = client.check(mock_server.uri()).await.unwrap();
915 assert!(res.status().is_error());
916 }
917
918 #[tokio::test]
919 async fn test_unsupported_scheme() {
920 let examples = vec![
921 "ftp://example.com",
922 "gopher://example.com",
923 "slack://example.com",
924 ];
925
926 for example in examples {
927 let client = ClientBuilder::builder().build().client().unwrap();
928 let res = client.check(example).await.unwrap();
929 assert!(res.status().is_unsupported());
930 }
931 }
932
933 #[tokio::test]
934 async fn test_chain() {
935 use reqwest::Request;
936
937 #[derive(Debug)]
938 struct ExampleHandler();
939
940 #[async_trait]
941 impl Handler<Request, Status> for ExampleHandler {
942 async fn handle(&mut self, _: Request) -> ChainResult<Request, Status> {
943 ChainResult::Done(Status::Excluded)
944 }
945 }
946
947 let chain = RequestChain::new(vec![Box::new(ExampleHandler {})]);
948
949 let client = ClientBuilder::builder()
950 .plugin_request_chain(chain)
951 .build()
952 .client()
953 .unwrap();
954
955 let result = client.check("http://example.com");
956 let res = result.await.unwrap();
957 assert_eq!(res.status(), &Status::Excluded);
958 }
959}