lychee_lib/
client.rs

1//! Handler of link checking operations.
2//!
3//! This module defines two structs, [`Client`] and [`ClientBuilder`].
4//! `Client` handles incoming requests and returns responses.
5//! `ClientBuilder` exposes a finer level of granularity for building
6//! a `Client`.
7//!
8//! For convenience, a free function [`check`] is provided for ad-hoc
9//! link checks.
10#![allow(
11    clippy::module_name_repetitions,
12    clippy::struct_excessive_bools,
13    clippy::default_trait_access,
14    clippy::used_underscore_binding
15)]
16use std::{collections::HashSet, sync::Arc, time::Duration};
17
18use http::{
19    StatusCode,
20    header::{HeaderMap, HeaderValue},
21};
22use log::debug;
23use octocrab::Octocrab;
24use regex::RegexSet;
25use reqwest::{header, redirect, tls};
26use reqwest_cookie_store::CookieStoreMutex;
27use secrecy::{ExposeSecret, SecretString};
28use typed_builder::TypedBuilder;
29
30use crate::{
31    Base, BasicAuthCredentials, ErrorKind, Request, Response, Result, Status, Uri,
32    chain::RequestChain,
33    checker::{file::FileChecker, mail::MailChecker, website::WebsiteChecker},
34    filter::Filter,
35    remap::Remaps,
36    types::{DEFAULT_ACCEPTED_STATUS_CODES, redirect_history::RedirectHistory},
37};
38
39/// Default number of redirects before a request is deemed as failed, 5.
40pub const DEFAULT_MAX_REDIRECTS: usize = 5;
41/// Default number of retries before a request is deemed as failed, 3.
42pub const DEFAULT_MAX_RETRIES: u64 = 3;
43/// Default wait time in seconds between retries, 1.
44pub const DEFAULT_RETRY_WAIT_TIME_SECS: usize = 1;
45/// Default timeout in seconds before a request is deemed as failed, 20.
46pub const DEFAULT_TIMEOUT_SECS: usize = 20;
47/// Default user agent, `lychee-<PKG_VERSION>`.
48pub const DEFAULT_USER_AGENT: &str = concat!("lychee/", env!("CARGO_PKG_VERSION"));
49
50// Constants currently not configurable by the user.
51/// A timeout for only the connect phase of a [`Client`].
52const CONNECT_TIMEOUT: u64 = 10;
53/// TCP keepalive.
54///
55/// See <https://tldp.org/HOWTO/TCP-Keepalive-HOWTO/overview.html> for more
56/// information.
57const TCP_KEEPALIVE: u64 = 60;
58
59/// Builder for [`Client`].
60///
61/// See crate-level documentation for usage example.
62#[derive(TypedBuilder, Debug, Clone)]
63#[builder(field_defaults(default, setter(into)))]
64pub struct ClientBuilder {
65    /// Optional GitHub token used for GitHub links.
66    ///
67    /// This allows much more request before getting rate-limited.
68    ///
69    /// # Rate-limiting Defaults
70    ///
71    /// As of Feb 2022, it's 60 per hour without GitHub token v.s.
72    /// 5000 per hour with token.
73    github_token: Option<SecretString>,
74
75    /// Remap URIs matching a pattern to a different URI.
76    ///
77    /// This makes it possible to remap any HTTP/HTTPS endpoint to a different
78    /// HTTP/HTTPS one. This feature could also be used to proxy
79    /// certain requests.
80    ///
81    /// # Usage Notes
82    ///
83    /// Use with caution because a large set of remapping rules may cause
84    /// performance issues.
85    ///
86    /// Furthermore rules are executed sequentially and multiple mappings for
87    /// the same URI are allowed, so it is up to the library user's discretion to
88    /// make sure rules don't conflict with each other.
89    remaps: Option<Remaps>,
90
91    /// Automatically append file extensions to `file://` URIs as needed
92    ///
93    /// This option takes effect on `file://` URIs which do not exist.
94    fallback_extensions: Vec<String>,
95
96    /// Index file names to use when resolving `file://` URIs which point to
97    /// directories.
98    ///
99    /// For local directory links, if this is non-`None`, then at least one
100    /// index file from this list must exist in order for the link to be
101    /// considered valid. Index files names are required to match regular
102    /// files, aside from the special `.` name which will match the
103    /// directory itself.
104    ///
105    /// If `None`, index file checking is disabled and directory links are valid
106    /// as long as the directory exists on disk.
107    ///
108    /// In the [`ClientBuilder`], this defaults to `None`.
109    #[builder(default = None)]
110    index_files: Option<Vec<String>>,
111
112    /// Links matching this set of regular expressions are **always** checked.
113    ///
114    /// This has higher precedence over [`ClientBuilder::excludes`], **but**
115    /// has lower precedence compared to any other `exclude_` fields or
116    /// [`ClientBuilder::schemes`] below.
117    includes: Option<RegexSet>,
118
119    /// Links matching this set of regular expressions are ignored, **except**
120    /// when a link also matches against [`ClientBuilder::includes`].
121    excludes: Option<RegexSet>,
122
123    /// When `true`, exclude all private network addresses.
124    ///
125    /// This effectively turns on the following fields:
126    /// - [`ClientBuilder::exclude_private_ips`]
127    /// - [`ClientBuilder::exclude_link_local_ips`]
128    /// - [`ClientBuilder::exclude_loopback_ips`]
129    exclude_all_private: bool,
130
131    /// When `true`, exclude private IP addresses.
132    ///
133    /// # IPv4
134    ///
135    /// The private address ranges are defined in [IETF RFC 1918] and include:
136    ///
137    ///  - `10.0.0.0/8`
138    ///  - `172.16.0.0/12`
139    ///  - `192.168.0.0/16`
140    ///
141    /// # IPv6
142    ///
143    /// The address is a unique local address (`fc00::/7`).
144    ///
145    /// This property is defined in [IETF RFC 4193].
146    ///
147    /// # Note
148    ///
149    /// Unicast site-local network was defined in [IETF RFC 4291], but was fully
150    /// deprecated in [IETF RFC 3879]. So it is **NOT** considered as private on
151    /// this purpose.
152    ///
153    /// [IETF RFC 1918]: https://tools.ietf.org/html/rfc1918
154    /// [IETF RFC 4193]: https://tools.ietf.org/html/rfc4193
155    /// [IETF RFC 4291]: https://tools.ietf.org/html/rfc4291
156    /// [IETF RFC 3879]: https://tools.ietf.org/html/rfc3879
157    exclude_private_ips: bool,
158
159    /// When `true`, exclude link-local IPs.
160    ///
161    /// # IPv4
162    ///
163    /// The address is `169.254.0.0/16`.
164    ///
165    /// This property is defined by [IETF RFC 3927].
166    ///
167    /// # IPv6
168    ///
169    /// The address is a unicast address with link-local scope,  as defined in
170    /// [RFC 4291].
171    ///
172    /// A unicast address has link-local scope if it has the prefix `fe80::/10`,
173    /// as per [RFC 4291 section 2.4].
174    ///
175    /// [IETF RFC 3927]: https://tools.ietf.org/html/rfc3927
176    /// [RFC 4291]: https://tools.ietf.org/html/rfc4291
177    /// [RFC 4291 section 2.4]: https://tools.ietf.org/html/rfc4291#section-2.4
178    exclude_link_local_ips: bool,
179
180    /// When `true`, exclude loopback IP addresses.
181    ///
182    /// # IPv4
183    ///
184    /// This is a loopback address (`127.0.0.0/8`).
185    ///
186    /// This property is defined by [IETF RFC 1122].
187    ///
188    /// # IPv6
189    ///
190    /// This is the loopback address (`::1`), as defined in
191    /// [IETF RFC 4291 section 2.5.3].
192    ///
193    /// [IETF RFC 1122]: https://tools.ietf.org/html/rfc1122
194    /// [IETF RFC 4291 section 2.5.3]: https://tools.ietf.org/html/rfc4291#section-2.5.3
195    exclude_loopback_ips: bool,
196
197    /// When `true`, check mail addresses.
198    include_mail: bool,
199
200    /// Maximum number of redirects per request before returning an error.
201    ///
202    /// Defaults to [`DEFAULT_MAX_REDIRECTS`].
203    #[builder(default = DEFAULT_MAX_REDIRECTS)]
204    max_redirects: usize,
205
206    /// Maximum number of retries per request before returning an error.
207    ///
208    /// Defaults to [`DEFAULT_MAX_RETRIES`].
209    #[builder(default = DEFAULT_MAX_RETRIES)]
210    max_retries: u64,
211
212    /// Minimum accepted TLS version.
213    min_tls_version: Option<tls::Version>,
214
215    /// User-agent used for checking links.
216    ///
217    /// Defaults to [`DEFAULT_USER_AGENT`].
218    ///
219    /// # Notes
220    ///
221    /// This may be helpful for bypassing certain firewalls.
222    // Faking the user agent is necessary for some websites, unfortunately.
223    // Otherwise we get a 403 from the firewall (e.g. Sucuri/Cloudproxy on ldra.com).
224    #[builder(default_code = "String::from(DEFAULT_USER_AGENT)")]
225    user_agent: String,
226
227    /// When `true`, accept invalid SSL certificates.
228    ///
229    /// # Warning
230    ///
231    /// You should think very carefully before allowing invalid SSL
232    /// certificates. It will accept any certificate for any site to be trusted
233    /// including expired certificates. This introduces significant
234    /// vulnerabilities, and should only be used as a last resort.
235    // TODO: We should add a warning message in CLI. (Lucius, Jan 2023)
236    allow_insecure: bool,
237
238    /// Set of accepted URL schemes.
239    ///
240    /// Only links with matched URI schemes are checked. This has no effect when
241    /// it's empty.
242    schemes: HashSet<String>,
243
244    /// Default [headers] for every request.
245    ///
246    /// This allows working around validation issues on some websites. See also
247    /// [here] for usage examples.
248    ///
249    /// [headers]: https://docs.rs/http/latest/http/header/struct.HeaderName.html
250    /// [here]: https://docs.rs/reqwest/latest/reqwest/struct.ClientBuilder.html#method.default_headers
251    custom_headers: HeaderMap,
252
253    /// HTTP method used for requests, e.g. `GET` or `HEAD`.
254    #[builder(default = reqwest::Method::GET)]
255    method: reqwest::Method,
256
257    /// Set of accepted return codes / status codes.
258    ///
259    /// Unmatched return codes/ status codes are deemed as errors.
260    #[builder(default = DEFAULT_ACCEPTED_STATUS_CODES.clone())]
261    accepted: HashSet<StatusCode>,
262
263    /// Response timeout per request in seconds.
264    timeout: Option<Duration>,
265
266    /// Base for resolving paths.
267    ///
268    /// E.g. if the base is `/home/user/` and the path is `file.txt`, the
269    /// resolved path would be `/home/user/file.txt`.
270    base: Option<Base>,
271
272    /// Initial time between retries of failed requests.
273    ///
274    /// Defaults to [`DEFAULT_RETRY_WAIT_TIME_SECS`].
275    ///
276    /// # Notes
277    ///
278    /// For each request, the wait time increases using an exponential backoff
279    /// mechanism. For example, if the value is 1 second, then it waits for
280    /// 2 ^ (N-1) seconds before the N-th retry.
281    ///
282    /// This prevents spending too much system resources on slow responders and
283    /// prioritizes other requests.
284    #[builder(default_code = "Duration::from_secs(DEFAULT_RETRY_WAIT_TIME_SECS as u64)")]
285    retry_wait_time: Duration,
286
287    /// When `true`, requires using HTTPS when it's available.
288    ///
289    /// This would treat unencrypted links as errors when HTTPS is available.
290    /// It has no effect on non-HTTP schemes or if the URL doesn't support
291    /// HTTPS.
292    require_https: bool,
293
294    /// Cookie store used for requests.
295    ///
296    /// See <https://docs.rs/reqwest/latest/reqwest/struct.ClientBuilder.html#method.cookie_store>
297    cookie_jar: Option<Arc<CookieStoreMutex>>,
298
299    /// Enable the checking of fragments in links.
300    include_fragments: bool,
301
302    /// Requests run through this chain where each item in the chain
303    /// can modify the request. A chained item can also decide to exit
304    /// early and return a status, so that subsequent chain items are
305    /// skipped and the lychee-internal request chain is not activated.
306    plugin_request_chain: RequestChain,
307}
308
309impl Default for ClientBuilder {
310    #[inline]
311    fn default() -> Self {
312        Self::builder().build()
313    }
314}
315
316impl ClientBuilder {
317    /// Instantiates a [`Client`].
318    ///
319    /// # Errors
320    ///
321    /// Returns an `Err` if:
322    /// - The user-agent contains characters other than ASCII 32-127.
323    /// - The reqwest client cannot be instantiated. This occurs if a TLS
324    ///   backend cannot be initialized or the resolver fails to load the system
325    ///   configuration. See [here].
326    /// - The GitHub client cannot be created. Since the implementation also
327    ///   uses reqwest under the hood, this errors in the same circumstances as
328    ///   the last one.
329    ///
330    /// [here]: https://docs.rs/reqwest/latest/reqwest/struct.ClientBuilder.html#errors
331    pub fn client(self) -> Result<Client> {
332        let Self {
333            user_agent,
334            custom_headers: mut headers,
335            ..
336        } = self;
337
338        if let Some(prev_user_agent) =
339            headers.insert(header::USER_AGENT, HeaderValue::try_from(&user_agent)?)
340        {
341            debug!(
342                "Found user-agent in headers: {}. Overriding it with {user_agent}.",
343                prev_user_agent.to_str().unwrap_or("�"),
344            );
345        }
346
347        headers.insert(
348            header::TRANSFER_ENCODING,
349            HeaderValue::from_static("chunked"),
350        );
351
352        let redirect_history = RedirectHistory::new();
353
354        let mut builder = reqwest::ClientBuilder::new()
355            .gzip(true)
356            .default_headers(headers)
357            .danger_accept_invalid_certs(self.allow_insecure)
358            .connect_timeout(Duration::from_secs(CONNECT_TIMEOUT))
359            .tcp_keepalive(Duration::from_secs(TCP_KEEPALIVE))
360            .redirect(redirect_policy(
361                redirect_history.clone(),
362                self.max_redirects,
363            ));
364
365        if let Some(cookie_jar) = self.cookie_jar {
366            builder = builder.cookie_provider(cookie_jar);
367        }
368
369        if let Some(min_tls) = self.min_tls_version {
370            builder = builder.min_tls_version(min_tls);
371        }
372
373        let reqwest_client = match self.timeout {
374            Some(t) => builder.timeout(t),
375            None => builder,
376        }
377        .build()
378        .map_err(ErrorKind::BuildRequestClient)?;
379
380        let github_client = match self.github_token.as_ref().map(ExposeSecret::expose_secret) {
381            Some(token) if !token.is_empty() => Some(
382                Octocrab::builder()
383                    .personal_token(token.to_string())
384                    .build()
385                    // this is essentially the same `reqwest::ClientBuilder::build` error
386                    // see https://docs.rs/octocrab/0.18.1/src/octocrab/lib.rs.html#360-364
387                    .map_err(|e: octocrab::Error| ErrorKind::BuildGithubClient(Box::new(e)))?,
388            ),
389            _ => None,
390        };
391
392        let filter = Filter {
393            includes: self.includes.map(Into::into),
394            excludes: self.excludes.map(Into::into),
395            schemes: self.schemes,
396            // exclude_all_private option turns on all "private" excludes,
397            // including private IPs, link-local IPs and loopback IPs
398            exclude_private_ips: self.exclude_all_private || self.exclude_private_ips,
399            exclude_link_local_ips: self.exclude_all_private || self.exclude_link_local_ips,
400            exclude_loopback_ips: self.exclude_all_private || self.exclude_loopback_ips,
401            include_mail: self.include_mail,
402        };
403
404        let website_checker = WebsiteChecker::new(
405            self.method,
406            self.retry_wait_time,
407            redirect_history.clone(),
408            self.max_retries,
409            reqwest_client,
410            self.accepted,
411            github_client,
412            self.require_https,
413            self.plugin_request_chain,
414            self.include_fragments,
415        );
416
417        Ok(Client {
418            remaps: self.remaps,
419            filter,
420            email_checker: MailChecker::new(),
421            website_checker,
422            file_checker: FileChecker::new(
423                self.base,
424                self.fallback_extensions,
425                self.index_files,
426                self.include_fragments,
427            ),
428        })
429    }
430}
431
432/// Create our custom [`redirect::Policy`] in order to stop following redirects
433/// once `max_redirects` is reached and to record redirections for reporting.
434fn redirect_policy(redirect_history: RedirectHistory, max_redirects: usize) -> redirect::Policy {
435    redirect::Policy::custom(move |attempt| {
436        if attempt.previous().len() > max_redirects {
437            attempt.stop()
438        } else {
439            let redirects = &[attempt.previous(), &[attempt.url().clone()]].concat();
440            redirect_history.record_redirects(redirects);
441            debug!("Following redirect to {}", attempt.url());
442            attempt.follow()
443        }
444    })
445}
446
447/// Handles incoming requests and returns responses.
448///
449/// See [`ClientBuilder`] which contains sane defaults for all configuration
450/// options.
451#[derive(Debug, Clone)]
452pub struct Client {
453    /// Optional remapping rules for URIs matching pattern.
454    remaps: Option<Remaps>,
455
456    /// Rules to decide whether a given link should be checked or ignored.
457    filter: Filter,
458
459    /// A checker for website URLs.
460    website_checker: WebsiteChecker,
461
462    /// A checker for file URLs.
463    file_checker: FileChecker,
464
465    /// A checker for email URLs.
466    email_checker: MailChecker,
467}
468
469impl Client {
470    /// Check a single request.
471    ///
472    /// `request` can be either a [`Request`] or a type that can be converted
473    /// into it. In any case, it must represent a valid URI.
474    ///
475    /// # Errors
476    ///
477    /// Returns an `Err` if:
478    /// - `request` does not represent a valid URI.
479    /// - Encrypted connection for a HTTP URL is available but unused. (Only
480    ///   checked when `Client::require_https` is `true`.)
481    #[allow(clippy::missing_panics_doc)]
482    pub async fn check<T, E>(&self, request: T) -> Result<Response>
483    where
484        Request: TryFrom<T, Error = E>,
485        ErrorKind: From<E>,
486    {
487        let Request {
488            ref mut uri,
489            credentials,
490            source,
491            ..
492        } = request.try_into()?;
493
494        self.remap(uri)?;
495
496        if self.is_excluded(uri) {
497            return Ok(Response::new(uri.clone(), Status::Excluded, source));
498        }
499
500        let status = match uri.scheme() {
501            // We don't check tel: URIs
502            _ if uri.is_tel() => Status::Excluded,
503            _ if uri.is_file() => self.check_file(uri).await,
504            _ if uri.is_mail() => self.check_mail(uri).await,
505            _ => self.check_website(uri, credentials).await?,
506        };
507
508        Ok(Response::new(uri.clone(), status, source))
509    }
510
511    /// Check a single file using the file checker.
512    pub async fn check_file(&self, uri: &Uri) -> Status {
513        self.file_checker.check(uri).await
514    }
515
516    /// Remap `uri` using the client-defined remapping rules.
517    ///
518    /// # Errors
519    ///
520    /// Returns an `Err` if the final, remapped `uri` is not a valid URI.
521    pub fn remap(&self, uri: &mut Uri) -> Result<()> {
522        if let Some(ref remaps) = self.remaps {
523            uri.url = remaps.remap(&uri.url)?;
524        }
525        Ok(())
526    }
527
528    /// Returns whether the given `uri` should be ignored from checking.
529    #[must_use]
530    pub fn is_excluded(&self, uri: &Uri) -> bool {
531        self.filter.is_excluded(uri)
532    }
533
534    /// Checks the given URI of a website.
535    ///
536    /// # Errors
537    ///
538    /// This returns an `Err` if
539    /// - The URI is invalid.
540    /// - The request failed.
541    /// - The response status code is not accepted.
542    /// - The URI cannot be converted to HTTPS.
543    pub async fn check_website(
544        &self,
545        uri: &Uri,
546        credentials: Option<BasicAuthCredentials>,
547    ) -> Result<Status> {
548        self.website_checker.check_website(uri, credentials).await
549    }
550
551    /// Checks a `mailto` URI.
552    pub async fn check_mail(&self, uri: &Uri) -> Status {
553        self.email_checker.check_mail(uri).await
554    }
555}
556
557/// A shorthand function to check a single URI.
558///
559/// This provides the simplest link check utility without having to create a
560/// [`Client`]. For more complex scenarios, see documentation of
561/// [`ClientBuilder`] instead.
562///
563/// # Errors
564///
565/// Returns an `Err` if:
566/// - The request client cannot be built (see [`ClientBuilder::client`] for
567///   failure cases).
568/// - The request cannot be checked (see [`Client::check`] for failure cases).
569pub async fn check<T, E>(request: T) -> Result<Response>
570where
571    Request: TryFrom<T, Error = E>,
572    ErrorKind: From<E>,
573{
574    let client = ClientBuilder::builder().build().client()?;
575    client.check(request).await
576}
577
578#[cfg(test)]
579mod tests {
580    use std::{
581        fs::File,
582        time::{Duration, Instant},
583    };
584
585    use async_trait::async_trait;
586    use http::{StatusCode, header::HeaderMap};
587    use reqwest::header;
588    use tempfile::tempdir;
589    use test_utils::get_mock_client_response;
590    use test_utils::mock_server;
591    use test_utils::redirecting_mock_server;
592    use wiremock::{
593        Mock,
594        matchers::{method, path},
595    };
596
597    use super::ClientBuilder;
598    use crate::{
599        ErrorKind, Request, Status, Uri,
600        chain::{ChainResult, Handler, RequestChain},
601    };
602
603    #[tokio::test]
604    async fn test_nonexistent() {
605        let mock_server = mock_server!(StatusCode::NOT_FOUND);
606        let res = get_mock_client_response!(mock_server.uri()).await;
607
608        assert!(res.status().is_error());
609    }
610
611    #[tokio::test]
612    async fn test_nonexistent_with_path() {
613        let res = get_mock_client_response!("http://127.0.0.1/invalid").await;
614        assert!(res.status().is_error());
615    }
616
617    #[tokio::test]
618    async fn test_github() {
619        let res = get_mock_client_response!("https://github.com/lycheeverse/lychee").await;
620        assert!(res.status().is_success());
621    }
622
623    #[tokio::test]
624    async fn test_github_nonexistent_repo() {
625        let res = get_mock_client_response!("https://github.com/lycheeverse/not-lychee").await;
626        assert!(res.status().is_error());
627    }
628
629    #[tokio::test]
630    async fn test_github_nonexistent_file() {
631        let res = get_mock_client_response!(
632            "https://github.com/lycheeverse/lychee/blob/master/NON_EXISTENT_FILE.md",
633        )
634        .await;
635        assert!(res.status().is_error());
636    }
637
638    #[tokio::test]
639    async fn test_youtube() {
640        // This is applying a quirk. See the quirks module.
641        let res = get_mock_client_response!("https://www.youtube.com/watch?v=NlKuICiT470&list=PLbWDhxwM_45mPVToqaIZNbZeIzFchsKKQ&index=7").await;
642        assert!(res.status().is_success());
643
644        let res = get_mock_client_response!("https://www.youtube.com/watch?v=invalidNlKuICiT470&list=PLbWDhxwM_45mPVToqaIZNbZeIzFchsKKQ&index=7").await;
645        assert!(res.status().is_error());
646    }
647
648    #[tokio::test]
649    async fn test_basic_auth() {
650        let mut r: Request = "https://authenticationtest.com/HTTPAuth/"
651            .try_into()
652            .unwrap();
653
654        let res = get_mock_client_response!(r.clone()).await;
655        assert_eq!(res.status().code(), Some(401.try_into().unwrap()));
656
657        r.credentials = Some(crate::BasicAuthCredentials {
658            username: "user".into(),
659            password: "pass".into(),
660        });
661
662        let res = get_mock_client_response!(r).await;
663        assert!(matches!(
664            res.status(),
665            Status::Redirected(StatusCode::OK, _)
666        ));
667    }
668
669    #[tokio::test]
670    async fn test_non_github() {
671        let mock_server = mock_server!(StatusCode::OK);
672        let res = get_mock_client_response!(mock_server.uri()).await;
673
674        assert!(res.status().is_success());
675    }
676
677    #[tokio::test]
678    async fn test_invalid_ssl() {
679        let res = get_mock_client_response!("https://expired.badssl.com/").await;
680
681        assert!(res.status().is_error());
682
683        // Same, but ignore certificate error
684        let res = ClientBuilder::builder()
685            .allow_insecure(true)
686            .build()
687            .client()
688            .unwrap()
689            .check("https://expired.badssl.com/")
690            .await
691            .unwrap();
692        assert!(res.status().is_success());
693    }
694
695    #[tokio::test]
696    async fn test_file() {
697        let dir = tempdir().unwrap();
698        let file = dir.path().join("temp");
699        File::create(file).unwrap();
700        let uri = format!("file://{}", dir.path().join("temp").to_str().unwrap());
701
702        let res = get_mock_client_response!(uri).await;
703        assert!(res.status().is_success());
704    }
705
706    #[tokio::test]
707    async fn test_custom_headers() {
708        // See https://github.com/rust-lang/crates.io/issues/788
709        let mut custom = HeaderMap::new();
710        custom.insert(header::ACCEPT, "text/html".parse().unwrap());
711        let res = ClientBuilder::builder()
712            .custom_headers(custom)
713            .build()
714            .client()
715            .unwrap()
716            .check("https://crates.io/crates/lychee")
717            .await
718            .unwrap();
719        assert!(res.status().is_success());
720    }
721
722    #[tokio::test]
723    async fn test_exclude_mail_by_default() {
724        let client = ClientBuilder::builder()
725            .exclude_all_private(true)
726            .build()
727            .client()
728            .unwrap();
729        assert!(client.is_excluded(&Uri {
730            url: "mailto://mail@example.com".try_into().unwrap()
731        }));
732    }
733
734    #[tokio::test]
735    async fn test_include_mail() {
736        let client = ClientBuilder::builder()
737            .include_mail(false)
738            .exclude_all_private(true)
739            .build()
740            .client()
741            .unwrap();
742        assert!(client.is_excluded(&Uri {
743            url: "mailto://mail@example.com".try_into().unwrap()
744        }));
745
746        let client = ClientBuilder::builder()
747            .include_mail(true)
748            .exclude_all_private(true)
749            .build()
750            .client()
751            .unwrap();
752        assert!(!client.is_excluded(&Uri {
753            url: "mailto://mail@example.com".try_into().unwrap()
754        }));
755    }
756
757    #[tokio::test]
758    async fn test_include_tel() {
759        let client = ClientBuilder::builder().build().client().unwrap();
760        assert!(client.is_excluded(&Uri {
761            url: "tel:1234567890".try_into().unwrap()
762        }));
763    }
764
765    #[tokio::test]
766    async fn test_require_https() {
767        let client = ClientBuilder::builder().build().client().unwrap();
768        let res = client.check("http://example.com").await.unwrap();
769        assert!(res.status().is_success());
770
771        // Same request will fail if HTTPS is required
772        let client = ClientBuilder::builder()
773            .require_https(true)
774            .build()
775            .client()
776            .unwrap();
777        let res = client.check("http://example.com").await.unwrap();
778        assert!(res.status().is_error());
779    }
780
781    #[tokio::test]
782    async fn test_timeout() {
783        // Note: this checks response timeout, not connect timeout.
784        // To check connect timeout, we'd have to do something more involved,
785        // see: https://github.com/LukeMathWalker/wiremock-rs/issues/19
786        let mock_delay = Duration::from_millis(20);
787        let checker_timeout = Duration::from_millis(10);
788        assert!(mock_delay > checker_timeout);
789
790        let mock_server = mock_server!(StatusCode::OK, set_delay(mock_delay));
791
792        let client = ClientBuilder::builder()
793            .timeout(checker_timeout)
794            .build()
795            .client()
796            .unwrap();
797
798        let res = client.check(mock_server.uri()).await.unwrap();
799        assert!(res.status().is_timeout());
800    }
801
802    #[tokio::test]
803    async fn test_exponential_backoff() {
804        let mock_delay = Duration::from_millis(20);
805        let checker_timeout = Duration::from_millis(10);
806        assert!(mock_delay > checker_timeout);
807
808        let mock_server = mock_server!(StatusCode::OK, set_delay(mock_delay));
809
810        // Perform a warm-up request to ensure the lazy regexes
811        // in lychee-lib/src/quirks/mod.rs are compiled.
812        // On some platforms, this can take some time(approx. 110ms),
813        // which should not be counted in the test.
814        let warm_up_client = ClientBuilder::builder()
815            .max_retries(0_u64)
816            .build()
817            .client()
818            .unwrap();
819        let _res = warm_up_client.check(mock_server.uri()).await.unwrap();
820
821        let client = ClientBuilder::builder()
822            .timeout(checker_timeout)
823            .max_retries(3_u64)
824            .retry_wait_time(Duration::from_millis(50))
825            .build()
826            .client()
827            .unwrap();
828
829        // Summary:
830        // 1. First request fails with timeout (after 10ms)
831        // 2. Retry after 50ms (total 60ms)
832        // 3. Second request fails with timeout (after 10ms)
833        // 4. Retry after 100ms (total 160ms)
834        // 5. Third request fails with timeout (after 10ms)
835        // 6. Retry after 200ms (total 360ms)
836        // Total: 360ms
837
838        let start = Instant::now();
839        let res = client.check(mock_server.uri()).await.unwrap();
840        let end = start.elapsed();
841
842        assert!(res.status().is_error());
843
844        // on slow connections, this might take a bit longer than nominal
845        // backed-off timeout (7 secs)
846        assert!((350..=550).contains(&end.as_millis()));
847    }
848
849    #[tokio::test]
850    async fn test_avoid_reqwest_panic() {
851        let client = ClientBuilder::builder().build().client().unwrap();
852        // This request will result in an Unsupported status, but it won't panic
853        let res = client.check("http://\"").await.unwrap();
854
855        assert!(matches!(
856            res.status(),
857            Status::Unsupported(ErrorKind::BuildRequestClient(_))
858        ));
859        assert!(res.status().is_unsupported());
860    }
861
862    #[tokio::test]
863    async fn test_max_redirects() {
864        let mock_server = wiremock::MockServer::start().await;
865
866        let redirect_uri = format!("{}/redirect", &mock_server.uri());
867        let redirect = wiremock::ResponseTemplate::new(StatusCode::PERMANENT_REDIRECT)
868            .insert_header("Location", redirect_uri.as_str());
869
870        let redirect_count = 15usize;
871        let initial_invocation = 1;
872
873        // Set up infinite redirect loop
874        Mock::given(method("GET"))
875            .and(path("/redirect"))
876            .respond_with(move |_: &_| redirect.clone())
877            .expect(initial_invocation + redirect_count as u64)
878            .mount(&mock_server)
879            .await;
880
881        let res = ClientBuilder::builder()
882            .max_redirects(redirect_count)
883            .build()
884            .client()
885            .unwrap()
886            .check(redirect_uri.clone())
887            .await
888            .unwrap();
889
890        assert_eq!(
891            res.status(),
892            &Status::Error(ErrorKind::RejectedStatusCode(
893                StatusCode::PERMANENT_REDIRECT
894            ))
895        );
896    }
897
898    #[tokio::test]
899    async fn test_redirects() {
900        redirecting_mock_server!(async |redirect_url: Url, ok_ur| {
901            let res = ClientBuilder::builder()
902                .max_redirects(1_usize)
903                .build()
904                .client()
905                .unwrap()
906                .check(Uri::from((redirect_url).clone()))
907                .await
908                .unwrap();
909
910            assert_eq!(
911                res.status(),
912                &Status::Redirected(StatusCode::OK, vec![redirect_url, ok_ur].into())
913            );
914        })
915        .await;
916    }
917
918    #[tokio::test]
919    async fn test_unsupported_scheme() {
920        let examples = vec![
921            "ftp://example.com",
922            "gopher://example.com",
923            "slack://example.com",
924        ];
925
926        for example in examples {
927            let client = ClientBuilder::builder().build().client().unwrap();
928            let res = client.check(example).await.unwrap();
929            assert!(res.status().is_unsupported());
930        }
931    }
932
933    #[tokio::test]
934    async fn test_chain() {
935        use reqwest::Request;
936
937        #[derive(Debug)]
938        struct ExampleHandler();
939
940        #[async_trait]
941        impl Handler<Request, Status> for ExampleHandler {
942            async fn handle(&mut self, _: Request) -> ChainResult<Request, Status> {
943                ChainResult::Done(Status::Excluded)
944            }
945        }
946
947        let chain = RequestChain::new(vec![Box::new(ExampleHandler {})]);
948
949        let client = ClientBuilder::builder()
950            .plugin_request_chain(chain)
951            .build()
952            .client()
953            .unwrap();
954
955        let result = client.check("http://example.com");
956        let res = result.await.unwrap();
957        assert_eq!(res.status(), &Status::Excluded);
958    }
959}