lychee_lib/
client.rs

1//! Handler of link checking operations.
2//!
3//! This module defines two structs, [`Client`] and [`ClientBuilder`].
4//! `Client` handles incoming requests and returns responses.
5//! `ClientBuilder` exposes a finer level of granularity for building
6//! a `Client`.
7//!
8//! For convenience, a free function [`check`] is provided for ad-hoc
9//! link checks.
10#![allow(
11    clippy::module_name_repetitions,
12    clippy::struct_excessive_bools,
13    clippy::default_trait_access,
14    clippy::used_underscore_binding
15)]
16use std::{collections::HashSet, sync::Arc, time::Duration};
17
18use http::{
19    StatusCode,
20    header::{HeaderMap, HeaderValue},
21};
22use log::debug;
23use octocrab::Octocrab;
24use regex::RegexSet;
25use reqwest::{header, redirect, tls};
26use reqwest_cookie_store::CookieStoreMutex;
27use secrecy::{ExposeSecret, SecretString};
28use typed_builder::TypedBuilder;
29
30use crate::{
31    Base, BasicAuthCredentials, ErrorKind, Request, Response, Result, Status, Uri,
32    chain::RequestChain,
33    checker::{file::FileChecker, mail::MailChecker, website::WebsiteChecker},
34    filter::Filter,
35    remap::Remaps,
36    types::DEFAULT_ACCEPTED_STATUS_CODES,
37};
38
39/// Default number of redirects before a request is deemed as failed, 5.
40pub const DEFAULT_MAX_REDIRECTS: usize = 5;
41/// Default number of retries before a request is deemed as failed, 3.
42pub const DEFAULT_MAX_RETRIES: u64 = 3;
43/// Default wait time in seconds between retries, 1.
44pub const DEFAULT_RETRY_WAIT_TIME_SECS: usize = 1;
45/// Default timeout in seconds before a request is deemed as failed, 20.
46pub const DEFAULT_TIMEOUT_SECS: usize = 20;
47/// Default user agent, `lychee-<PKG_VERSION>`.
48pub const DEFAULT_USER_AGENT: &str = concat!("lychee/", env!("CARGO_PKG_VERSION"));
49
50// Constants currently not configurable by the user.
51/// A timeout for only the connect phase of a [`Client`].
52const CONNECT_TIMEOUT: u64 = 10;
53/// TCP keepalive.
54///
55/// See <https://tldp.org/HOWTO/TCP-Keepalive-HOWTO/overview.html> for more
56/// information.
57const TCP_KEEPALIVE: u64 = 60;
58
59/// Builder for [`Client`].
60///
61/// See crate-level documentation for usage example.
62#[derive(TypedBuilder, Debug, Clone)]
63#[builder(field_defaults(default, setter(into)))]
64pub struct ClientBuilder {
65    /// Optional GitHub token used for GitHub links.
66    ///
67    /// This allows much more request before getting rate-limited.
68    ///
69    /// # Rate-limiting Defaults
70    ///
71    /// As of Feb 2022, it's 60 per hour without GitHub token v.s.
72    /// 5000 per hour with token.
73    github_token: Option<SecretString>,
74
75    /// Remap URIs matching a pattern to a different URI.
76    ///
77    /// This makes it possible to remap any HTTP/HTTPS endpoint to a different
78    /// HTTP/HTTPS one. This feature could also be used to proxy
79    /// certain requests.
80    ///
81    /// # Usage Notes
82    ///
83    /// Use with caution because a large set of remapping rules may cause
84    /// performance issues.
85    ///
86    /// Furthermore rules are executed sequentially and multiple mappings for
87    /// the same URI are allowed, so it is up to the library user's discretion to
88    /// make sure rules don't conflict with each other.
89    remaps: Option<Remaps>,
90
91    /// Automatically append file extensions to `file://` URIs as needed
92    ///
93    /// This option takes effect on `file://` URIs which do not exist.
94    fallback_extensions: Vec<String>,
95
96    /// Index file names to use when resolving `file://` URIs which point to
97    /// directories.
98    ///
99    /// For local directory links, if this is non-`None`, then at least one
100    /// index file from this list must exist in order for the link to be
101    /// considered valid. Index files names are required to match regular
102    /// files, aside from the special `.` name which will match the
103    /// directory itself.
104    ///
105    /// If `None`, index file checking is disabled and directory links are valid
106    /// as long as the directory exists on disk.
107    ///
108    /// In the [`ClientBuilder`], this defaults to `None`.
109    #[builder(default = None)]
110    index_files: Option<Vec<String>>,
111
112    /// Links matching this set of regular expressions are **always** checked.
113    ///
114    /// This has higher precedence over [`ClientBuilder::excludes`], **but**
115    /// has lower precedence compared to any other `exclude_` fields or
116    /// [`ClientBuilder::schemes`] below.
117    includes: Option<RegexSet>,
118
119    /// Links matching this set of regular expressions are ignored, **except**
120    /// when a link also matches against [`ClientBuilder::includes`].
121    excludes: Option<RegexSet>,
122
123    /// When `true`, exclude all private network addresses.
124    ///
125    /// This effectively turns on the following fields:
126    /// - [`ClientBuilder::exclude_private_ips`]
127    /// - [`ClientBuilder::exclude_link_local_ips`]
128    /// - [`ClientBuilder::exclude_loopback_ips`]
129    exclude_all_private: bool,
130
131    /// When `true`, exclude private IP addresses.
132    ///
133    /// # IPv4
134    ///
135    /// The private address ranges are defined in [IETF RFC 1918] and include:
136    ///
137    ///  - `10.0.0.0/8`
138    ///  - `172.16.0.0/12`
139    ///  - `192.168.0.0/16`
140    ///
141    /// # IPv6
142    ///
143    /// The address is a unique local address (`fc00::/7`).
144    ///
145    /// This property is defined in [IETF RFC 4193].
146    ///
147    /// # Note
148    ///
149    /// Unicast site-local network was defined in [IETF RFC 4291], but was fully
150    /// deprecated in [IETF RFC 3879]. So it is **NOT** considered as private on
151    /// this purpose.
152    ///
153    /// [IETF RFC 1918]: https://tools.ietf.org/html/rfc1918
154    /// [IETF RFC 4193]: https://tools.ietf.org/html/rfc4193
155    /// [IETF RFC 4291]: https://tools.ietf.org/html/rfc4291
156    /// [IETF RFC 3879]: https://tools.ietf.org/html/rfc3879
157    exclude_private_ips: bool,
158
159    /// When `true`, exclude link-local IPs.
160    ///
161    /// # IPv4
162    ///
163    /// The address is `169.254.0.0/16`.
164    ///
165    /// This property is defined by [IETF RFC 3927].
166    ///
167    /// # IPv6
168    ///
169    /// The address is a unicast address with link-local scope,  as defined in
170    /// [RFC 4291].
171    ///
172    /// A unicast address has link-local scope if it has the prefix `fe80::/10`,
173    /// as per [RFC 4291 section 2.4].
174    ///
175    /// [IETF RFC 3927]: https://tools.ietf.org/html/rfc3927
176    /// [RFC 4291]: https://tools.ietf.org/html/rfc4291
177    /// [RFC 4291 section 2.4]: https://tools.ietf.org/html/rfc4291#section-2.4
178    exclude_link_local_ips: bool,
179
180    /// When `true`, exclude loopback IP addresses.
181    ///
182    /// # IPv4
183    ///
184    /// This is a loopback address (`127.0.0.0/8`).
185    ///
186    /// This property is defined by [IETF RFC 1122].
187    ///
188    /// # IPv6
189    ///
190    /// This is the loopback address (`::1`), as defined in
191    /// [IETF RFC 4291 section 2.5.3].
192    ///
193    /// [IETF RFC 1122]: https://tools.ietf.org/html/rfc1122
194    /// [IETF RFC 4291 section 2.5.3]: https://tools.ietf.org/html/rfc4291#section-2.5.3
195    exclude_loopback_ips: bool,
196
197    /// When `true`, check mail addresses.
198    include_mail: bool,
199
200    /// Maximum number of redirects per request before returning an error.
201    ///
202    /// Defaults to [`DEFAULT_MAX_REDIRECTS`].
203    #[builder(default = DEFAULT_MAX_REDIRECTS)]
204    max_redirects: usize,
205
206    /// Maximum number of retries per request before returning an error.
207    ///
208    /// Defaults to [`DEFAULT_MAX_RETRIES`].
209    #[builder(default = DEFAULT_MAX_RETRIES)]
210    max_retries: u64,
211
212    /// Minimum accepted TLS version.
213    min_tls_version: Option<tls::Version>,
214
215    /// User-agent used for checking links.
216    ///
217    /// Defaults to [`DEFAULT_USER_AGENT`].
218    ///
219    /// # Notes
220    ///
221    /// This may be helpful for bypassing certain firewalls.
222    // Faking the user agent is necessary for some websites, unfortunately.
223    // Otherwise we get a 403 from the firewall (e.g. Sucuri/Cloudproxy on ldra.com).
224    #[builder(default_code = "String::from(DEFAULT_USER_AGENT)")]
225    user_agent: String,
226
227    /// When `true`, accept invalid SSL certificates.
228    ///
229    /// # Warning
230    ///
231    /// You should think very carefully before allowing invalid SSL
232    /// certificates. It will accept any certificate for any site to be trusted
233    /// including expired certificates. This introduces significant
234    /// vulnerabilities, and should only be used as a last resort.
235    // TODO: We should add a warning message in CLI. (Lucius, Jan 2023)
236    allow_insecure: bool,
237
238    /// Set of accepted URL schemes.
239    ///
240    /// Only links with matched URI schemes are checked. This has no effect when
241    /// it's empty.
242    schemes: HashSet<String>,
243
244    /// Default [headers] for every request.
245    ///
246    /// This allows working around validation issues on some websites. See also
247    /// [here] for usage examples.
248    ///
249    /// [headers]: https://docs.rs/http/latest/http/header/struct.HeaderName.html
250    /// [here]: https://docs.rs/reqwest/latest/reqwest/struct.ClientBuilder.html#method.default_headers
251    custom_headers: HeaderMap,
252
253    /// HTTP method used for requests, e.g. `GET` or `HEAD`.
254    #[builder(default = reqwest::Method::GET)]
255    method: reqwest::Method,
256
257    /// Set of accepted return codes / status codes.
258    ///
259    /// Unmatched return codes/ status codes are deemed as errors.
260    #[builder(default = DEFAULT_ACCEPTED_STATUS_CODES.clone())]
261    accepted: HashSet<StatusCode>,
262
263    /// Response timeout per request in seconds.
264    timeout: Option<Duration>,
265
266    /// Base for resolving paths.
267    ///
268    /// E.g. if the base is `/home/user/` and the path is `file.txt`, the
269    /// resolved path would be `/home/user/file.txt`.
270    base: Option<Base>,
271
272    /// Initial time between retries of failed requests.
273    ///
274    /// Defaults to [`DEFAULT_RETRY_WAIT_TIME_SECS`].
275    ///
276    /// # Notes
277    ///
278    /// For each request, the wait time increases using an exponential backoff
279    /// mechanism. For example, if the value is 1 second, then it waits for
280    /// 2 ^ (N-1) seconds before the N-th retry.
281    ///
282    /// This prevents spending too much system resources on slow responders and
283    /// prioritizes other requests.
284    #[builder(default_code = "Duration::from_secs(DEFAULT_RETRY_WAIT_TIME_SECS as u64)")]
285    retry_wait_time: Duration,
286
287    /// When `true`, requires using HTTPS when it's available.
288    ///
289    /// This would treat unencrypted links as errors when HTTPS is available.
290    /// It has no effect on non-HTTP schemes or if the URL doesn't support
291    /// HTTPS.
292    require_https: bool,
293
294    /// Cookie store used for requests.
295    ///
296    /// See <https://docs.rs/reqwest/latest/reqwest/struct.ClientBuilder.html#method.cookie_store>
297    cookie_jar: Option<Arc<CookieStoreMutex>>,
298
299    /// Enable the checking of fragments in links.
300    include_fragments: bool,
301
302    /// Requests run through this chain where each item in the chain
303    /// can modify the request. A chained item can also decide to exit
304    /// early and return a status, so that subsequent chain items are
305    /// skipped and the lychee-internal request chain is not activated.
306    plugin_request_chain: RequestChain,
307}
308
309impl Default for ClientBuilder {
310    #[inline]
311    fn default() -> Self {
312        Self::builder().build()
313    }
314}
315
316impl ClientBuilder {
317    /// Instantiates a [`Client`].
318    ///
319    /// # Errors
320    ///
321    /// Returns an `Err` if:
322    /// - The user-agent contains characters other than ASCII 32-127.
323    /// - The reqwest client cannot be instantiated. This occurs if a TLS
324    ///   backend cannot be initialized or the resolver fails to load the system
325    ///   configuration. See [here].
326    /// - The GitHub client cannot be created. Since the implementation also
327    ///   uses reqwest under the hood, this errors in the same circumstances as
328    ///   the last one.
329    ///
330    /// [here]: https://docs.rs/reqwest/latest/reqwest/struct.ClientBuilder.html#errors
331    pub fn client(self) -> Result<Client> {
332        let Self {
333            user_agent,
334            custom_headers: mut headers,
335            ..
336        } = self;
337
338        if let Some(prev_user_agent) =
339            headers.insert(header::USER_AGENT, HeaderValue::try_from(&user_agent)?)
340        {
341            debug!(
342                "Found user-agent in headers: {}. Overriding it with {user_agent}.",
343                prev_user_agent.to_str().unwrap_or("�"),
344            );
345        }
346
347        headers.insert(
348            header::TRANSFER_ENCODING,
349            HeaderValue::from_static("chunked"),
350        );
351
352        // Custom redirect policy to enable logging of redirects.
353        let max_redirects = self.max_redirects;
354        let redirect_policy = redirect::Policy::custom(move |attempt| {
355            if attempt.previous().len() > max_redirects {
356                attempt.error("too many redirects")
357            } else {
358                debug!("Redirecting to {}", attempt.url());
359                attempt.follow()
360            }
361        });
362
363        let mut builder = reqwest::ClientBuilder::new()
364            .gzip(true)
365            .default_headers(headers)
366            .danger_accept_invalid_certs(self.allow_insecure)
367            .connect_timeout(Duration::from_secs(CONNECT_TIMEOUT))
368            .tcp_keepalive(Duration::from_secs(TCP_KEEPALIVE))
369            .redirect(redirect_policy);
370
371        if let Some(cookie_jar) = self.cookie_jar {
372            builder = builder.cookie_provider(cookie_jar);
373        }
374
375        if let Some(min_tls) = self.min_tls_version {
376            builder = builder.min_tls_version(min_tls);
377        }
378
379        let reqwest_client = match self.timeout {
380            Some(t) => builder.timeout(t),
381            None => builder,
382        }
383        .build()
384        .map_err(ErrorKind::BuildRequestClient)?;
385
386        let github_client = match self.github_token.as_ref().map(ExposeSecret::expose_secret) {
387            Some(token) if !token.is_empty() => Some(
388                Octocrab::builder()
389                    .personal_token(token.to_string())
390                    .build()
391                    // this is essentially the same `reqwest::ClientBuilder::build` error
392                    // see https://docs.rs/octocrab/0.18.1/src/octocrab/lib.rs.html#360-364
393                    .map_err(|e: octocrab::Error| ErrorKind::BuildGithubClient(Box::new(e)))?,
394            ),
395            _ => None,
396        };
397
398        let filter = Filter {
399            includes: self.includes.map(Into::into),
400            excludes: self.excludes.map(Into::into),
401            schemes: self.schemes,
402            // exclude_all_private option turns on all "private" excludes,
403            // including private IPs, link-local IPs and loopback IPs
404            exclude_private_ips: self.exclude_all_private || self.exclude_private_ips,
405            exclude_link_local_ips: self.exclude_all_private || self.exclude_link_local_ips,
406            exclude_loopback_ips: self.exclude_all_private || self.exclude_loopback_ips,
407            include_mail: self.include_mail,
408        };
409
410        let website_checker = WebsiteChecker::new(
411            self.method,
412            self.retry_wait_time,
413            self.max_retries,
414            reqwest_client,
415            self.accepted,
416            github_client,
417            self.require_https,
418            self.plugin_request_chain,
419            self.include_fragments,
420        );
421
422        Ok(Client {
423            remaps: self.remaps,
424            filter,
425            email_checker: MailChecker::new(),
426            website_checker,
427            file_checker: FileChecker::new(
428                self.base,
429                self.fallback_extensions,
430                self.index_files,
431                self.include_fragments,
432            ),
433        })
434    }
435}
436
437/// Handles incoming requests and returns responses.
438///
439/// See [`ClientBuilder`] which contains sane defaults for all configuration
440/// options.
441#[derive(Debug, Clone)]
442pub struct Client {
443    /// Optional remapping rules for URIs matching pattern.
444    remaps: Option<Remaps>,
445
446    /// Rules to decided whether each link should be checked or ignored.
447    filter: Filter,
448
449    /// A checker for website URLs.
450    website_checker: WebsiteChecker,
451
452    /// A checker for file URLs.
453    file_checker: FileChecker,
454
455    /// A checker for email URLs.
456    email_checker: MailChecker,
457}
458
459impl Client {
460    /// Check a single request.
461    ///
462    /// `request` can be either a [`Request`] or a type that can be converted
463    /// into it. In any case, it must represent a valid URI.
464    ///
465    /// # Errors
466    ///
467    /// Returns an `Err` if:
468    /// - `request` does not represent a valid URI.
469    /// - Encrypted connection for a HTTP URL is available but unused. (Only
470    ///   checked when `Client::require_https` is `true`.)
471    #[allow(clippy::missing_panics_doc)]
472    pub async fn check<T, E>(&self, request: T) -> Result<Response>
473    where
474        Request: TryFrom<T, Error = E>,
475        ErrorKind: From<E>,
476    {
477        let Request {
478            ref mut uri,
479            credentials,
480            source,
481            ..
482        } = request.try_into()?;
483
484        // Allow filtering based on element and attribute
485        // if !self.filter.is_allowed(uri) {
486        //     return Ok(Response::new(
487        //         uri.clone(),
488        //         Status::Excluded,
489        //         source,
490        //     ));
491        // }
492
493        self.remap(uri)?;
494
495        if self.is_excluded(uri) {
496            return Ok(Response::new(uri.clone(), Status::Excluded, source));
497        }
498
499        let status = match uri.scheme() {
500            // We don't check tel: URIs
501            _ if uri.is_tel() => Status::Excluded,
502            _ if uri.is_file() => self.check_file(uri).await,
503            _ if uri.is_mail() => self.check_mail(uri).await,
504            _ => self.check_website(uri, credentials).await?,
505        };
506
507        Ok(Response::new(uri.clone(), status, source))
508    }
509
510    /// Check a single file using the file checker.
511    pub async fn check_file(&self, uri: &Uri) -> Status {
512        self.file_checker.check(uri).await
513    }
514
515    /// Remap `uri` using the client-defined remapping rules.
516    ///
517    /// # Errors
518    ///
519    /// Returns an `Err` if the final, remapped `uri` is not a valid URI.
520    pub fn remap(&self, uri: &mut Uri) -> Result<()> {
521        if let Some(ref remaps) = self.remaps {
522            uri.url = remaps.remap(&uri.url)?;
523        }
524        Ok(())
525    }
526
527    /// Returns whether the given `uri` should be ignored from checking.
528    #[must_use]
529    pub fn is_excluded(&self, uri: &Uri) -> bool {
530        self.filter.is_excluded(uri)
531    }
532
533    /// Checks the given URI of a website.
534    ///
535    /// # Errors
536    ///
537    /// This returns an `Err` if
538    /// - The URI is invalid.
539    /// - The request failed.
540    /// - The response status code is not accepted.
541    /// - The URI cannot be converted to HTTPS.
542    pub async fn check_website(
543        &self,
544        uri: &Uri,
545        credentials: Option<BasicAuthCredentials>,
546    ) -> Result<Status> {
547        self.website_checker.check_website(uri, credentials).await
548    }
549
550    /// Checks a `mailto` URI.
551    pub async fn check_mail(&self, uri: &Uri) -> Status {
552        self.email_checker.check_mail(uri).await
553    }
554}
555
556/// A shorthand function to check a single URI.
557///
558/// This provides the simplest link check utility without having to create a
559/// [`Client`]. For more complex scenarios, see documentation of
560/// [`ClientBuilder`] instead.
561///
562/// # Errors
563///
564/// Returns an `Err` if:
565/// - The request client cannot be built (see [`ClientBuilder::client`] for
566///   failure cases).
567/// - The request cannot be checked (see [`Client::check`] for failure cases).
568pub async fn check<T, E>(request: T) -> Result<Response>
569where
570    Request: TryFrom<T, Error = E>,
571    ErrorKind: From<E>,
572{
573    let client = ClientBuilder::builder().build().client()?;
574    client.check(request).await
575}
576
577#[cfg(test)]
578mod tests {
579    use std::{
580        fs::File,
581        time::{Duration, Instant},
582    };
583
584    use async_trait::async_trait;
585    use http::{StatusCode, header::HeaderMap};
586    use reqwest::header;
587    use tempfile::tempdir;
588    use wiremock::matchers::path;
589
590    use super::ClientBuilder;
591    use crate::{
592        ErrorKind, Request, Status, Uri,
593        chain::{ChainResult, Handler, RequestChain},
594        mock_server,
595        test_utils::get_mock_client_response,
596    };
597
598    #[tokio::test]
599    async fn test_nonexistent() {
600        let mock_server = mock_server!(StatusCode::NOT_FOUND);
601        let res = get_mock_client_response(mock_server.uri()).await;
602
603        assert!(res.status().is_error());
604    }
605
606    #[tokio::test]
607    async fn test_nonexistent_with_path() {
608        let res = get_mock_client_response("http://127.0.0.1/invalid").await;
609        assert!(res.status().is_error());
610    }
611
612    #[tokio::test]
613    async fn test_github() {
614        let res = get_mock_client_response("https://github.com/lycheeverse/lychee").await;
615        assert!(res.status().is_success());
616    }
617
618    #[tokio::test]
619    async fn test_github_nonexistent_repo() {
620        let res = get_mock_client_response("https://github.com/lycheeverse/not-lychee").await;
621        assert!(res.status().is_error());
622    }
623
624    #[tokio::test]
625    async fn test_github_nonexistent_file() {
626        let res = get_mock_client_response(
627            "https://github.com/lycheeverse/lychee/blob/master/NON_EXISTENT_FILE.md",
628        )
629        .await;
630        assert!(res.status().is_error());
631    }
632
633    #[tokio::test]
634    async fn test_youtube() {
635        // This is applying a quirk. See the quirks module.
636        let res = get_mock_client_response("https://www.youtube.com/watch?v=NlKuICiT470&list=PLbWDhxwM_45mPVToqaIZNbZeIzFchsKKQ&index=7").await;
637        assert!(res.status().is_success());
638
639        let res = get_mock_client_response("https://www.youtube.com/watch?v=invalidNlKuICiT470&list=PLbWDhxwM_45mPVToqaIZNbZeIzFchsKKQ&index=7").await;
640        assert!(res.status().is_error());
641    }
642
643    #[tokio::test]
644    async fn test_basic_auth() {
645        let mut r: Request = "https://authenticationtest.com/HTTPAuth/"
646            .try_into()
647            .unwrap();
648
649        let res = get_mock_client_response(r.clone()).await;
650        assert_eq!(res.status().code(), Some(401.try_into().unwrap()));
651
652        r.credentials = Some(crate::BasicAuthCredentials {
653            username: "user".into(),
654            password: "pass".into(),
655        });
656
657        let res = get_mock_client_response(r).await;
658        assert!(res.status().is_success());
659    }
660
661    #[tokio::test]
662    async fn test_non_github() {
663        let mock_server = mock_server!(StatusCode::OK);
664        let res = get_mock_client_response(mock_server.uri()).await;
665
666        assert!(res.status().is_success());
667    }
668
669    #[tokio::test]
670    async fn test_invalid_ssl() {
671        let res = get_mock_client_response("https://expired.badssl.com/").await;
672
673        assert!(res.status().is_error());
674
675        // Same, but ignore certificate error
676        let res = ClientBuilder::builder()
677            .allow_insecure(true)
678            .build()
679            .client()
680            .unwrap()
681            .check("https://expired.badssl.com/")
682            .await
683            .unwrap();
684        assert!(res.status().is_success());
685    }
686
687    #[tokio::test]
688    async fn test_file() {
689        let dir = tempdir().unwrap();
690        let file = dir.path().join("temp");
691        File::create(file).unwrap();
692        let uri = format!("file://{}", dir.path().join("temp").to_str().unwrap());
693
694        let res = get_mock_client_response(uri).await;
695        assert!(res.status().is_success());
696    }
697
698    #[tokio::test]
699    async fn test_custom_headers() {
700        // See https://github.com/rust-lang/crates.io/issues/788
701        let mut custom = HeaderMap::new();
702        custom.insert(header::ACCEPT, "text/html".parse().unwrap());
703        let res = ClientBuilder::builder()
704            .custom_headers(custom)
705            .build()
706            .client()
707            .unwrap()
708            .check("https://crates.io/crates/lychee")
709            .await
710            .unwrap();
711        assert!(res.status().is_success());
712    }
713
714    #[tokio::test]
715    async fn test_exclude_mail_by_default() {
716        let client = ClientBuilder::builder()
717            .exclude_all_private(true)
718            .build()
719            .client()
720            .unwrap();
721        assert!(client.is_excluded(&Uri {
722            url: "mailto://mail@example.com".try_into().unwrap()
723        }));
724    }
725
726    #[tokio::test]
727    async fn test_include_mail() {
728        let client = ClientBuilder::builder()
729            .include_mail(false)
730            .exclude_all_private(true)
731            .build()
732            .client()
733            .unwrap();
734        assert!(client.is_excluded(&Uri {
735            url: "mailto://mail@example.com".try_into().unwrap()
736        }));
737
738        let client = ClientBuilder::builder()
739            .include_mail(true)
740            .exclude_all_private(true)
741            .build()
742            .client()
743            .unwrap();
744        assert!(!client.is_excluded(&Uri {
745            url: "mailto://mail@example.com".try_into().unwrap()
746        }));
747    }
748
749    #[tokio::test]
750    async fn test_include_tel() {
751        let client = ClientBuilder::builder().build().client().unwrap();
752        assert!(client.is_excluded(&Uri {
753            url: "tel:1234567890".try_into().unwrap()
754        }));
755    }
756
757    #[tokio::test]
758    async fn test_require_https() {
759        let client = ClientBuilder::builder().build().client().unwrap();
760        let res = client.check("http://example.com").await.unwrap();
761        assert!(res.status().is_success());
762
763        // Same request will fail if HTTPS is required
764        let client = ClientBuilder::builder()
765            .require_https(true)
766            .build()
767            .client()
768            .unwrap();
769        let res = client.check("http://example.com").await.unwrap();
770        assert!(res.status().is_error());
771    }
772
773    #[tokio::test]
774    async fn test_timeout() {
775        // Note: this checks response timeout, not connect timeout.
776        // To check connect timeout, we'd have to do something more involved,
777        // see: https://github.com/LukeMathWalker/wiremock-rs/issues/19
778        let mock_delay = Duration::from_millis(20);
779        let checker_timeout = Duration::from_millis(10);
780        assert!(mock_delay > checker_timeout);
781
782        let mock_server = mock_server!(StatusCode::OK, set_delay(mock_delay));
783
784        let client = ClientBuilder::builder()
785            .timeout(checker_timeout)
786            .build()
787            .client()
788            .unwrap();
789
790        let res = client.check(mock_server.uri()).await.unwrap();
791        assert!(res.status().is_timeout());
792    }
793
794    #[tokio::test]
795    async fn test_exponential_backoff() {
796        let mock_delay = Duration::from_millis(20);
797        let checker_timeout = Duration::from_millis(10);
798        assert!(mock_delay > checker_timeout);
799
800        let mock_server = mock_server!(StatusCode::OK, set_delay(mock_delay));
801
802        // Perform a warm-up request to ensure the lazy regexes
803        // in lychee-lib/src/quirks/mod.rs are compiled.
804        // On some platforms, this can take some time(approx. 110ms),
805        // which should not be counted in the test.
806        let warm_up_client = ClientBuilder::builder()
807            .max_retries(0_u64)
808            .build()
809            .client()
810            .unwrap();
811        let _res = warm_up_client.check(mock_server.uri()).await.unwrap();
812
813        let client = ClientBuilder::builder()
814            .timeout(checker_timeout)
815            .max_retries(3_u64)
816            .retry_wait_time(Duration::from_millis(50))
817            .build()
818            .client()
819            .unwrap();
820
821        // Summary:
822        // 1. First request fails with timeout (after 10ms)
823        // 2. Retry after 50ms (total 60ms)
824        // 3. Second request fails with timeout (after 10ms)
825        // 4. Retry after 100ms (total 160ms)
826        // 5. Third request fails with timeout (after 10ms)
827        // 6. Retry after 200ms (total 360ms)
828        // Total: 360ms
829
830        let start = Instant::now();
831        let res = client.check(mock_server.uri()).await.unwrap();
832        let end = start.elapsed();
833
834        assert!(res.status().is_error());
835
836        // on slow connections, this might take a bit longer than nominal
837        // backed-off timeout (7 secs)
838        assert!((350..=550).contains(&end.as_millis()));
839    }
840
841    #[tokio::test]
842    async fn test_avoid_reqwest_panic() {
843        let client = ClientBuilder::builder().build().client().unwrap();
844        // This request will result in an Unsupported status, but it won't panic
845        let res = client.check("http://\"").await.unwrap();
846
847        assert!(matches!(
848            res.status(),
849            Status::Unsupported(ErrorKind::BuildRequestClient(_))
850        ));
851        assert!(res.status().is_unsupported());
852    }
853
854    #[tokio::test]
855    async fn test_max_redirects() {
856        let mock_server = wiremock::MockServer::start().await;
857
858        let ok_uri = format!("{}/ok", &mock_server.uri());
859        let redirect_uri = format!("{}/redirect", &mock_server.uri());
860
861        // Set up permanent redirect loop
862        let redirect = wiremock::ResponseTemplate::new(StatusCode::PERMANENT_REDIRECT)
863            .insert_header("Location", ok_uri.as_str());
864        wiremock::Mock::given(wiremock::matchers::method("GET"))
865            .and(path("/redirect"))
866            .respond_with(redirect)
867            .mount(&mock_server)
868            .await;
869
870        let ok = wiremock::ResponseTemplate::new(StatusCode::OK);
871        wiremock::Mock::given(wiremock::matchers::method("GET"))
872            .and(path("/ok"))
873            .respond_with(ok)
874            .mount(&mock_server)
875            .await;
876
877        let client = ClientBuilder::builder()
878            .max_redirects(0_usize)
879            .build()
880            .client()
881            .unwrap();
882
883        let res = client.check(redirect_uri.clone()).await.unwrap();
884        assert!(res.status().is_error());
885
886        let client = ClientBuilder::builder()
887            .max_redirects(1_usize)
888            .build()
889            .client()
890            .unwrap();
891
892        let res = client.check(redirect_uri).await.unwrap();
893        assert!(res.status().is_success());
894    }
895
896    #[tokio::test]
897    async fn test_limit_max_redirects() {
898        let mock_server = wiremock::MockServer::start().await;
899
900        // Set up permanent redirect loop
901        let template = wiremock::ResponseTemplate::new(StatusCode::PERMANENT_REDIRECT)
902            .insert_header("Location", mock_server.uri().as_str());
903        wiremock::Mock::given(wiremock::matchers::method("GET"))
904            .respond_with(template)
905            .mount(&mock_server)
906            .await;
907
908        let client = ClientBuilder::builder()
909            .max_redirects(0_usize)
910            .build()
911            .client()
912            .unwrap();
913
914        let res = client.check(mock_server.uri()).await.unwrap();
915        assert!(res.status().is_error());
916    }
917
918    #[tokio::test]
919    async fn test_unsupported_scheme() {
920        let examples = vec![
921            "ftp://example.com",
922            "gopher://example.com",
923            "slack://example.com",
924        ];
925
926        for example in examples {
927            let client = ClientBuilder::builder().build().client().unwrap();
928            let res = client.check(example).await.unwrap();
929            assert!(res.status().is_unsupported());
930        }
931    }
932
933    #[tokio::test]
934    async fn test_chain() {
935        use reqwest::Request;
936
937        #[derive(Debug)]
938        struct ExampleHandler();
939
940        #[async_trait]
941        impl Handler<Request, Status> for ExampleHandler {
942            async fn handle(&mut self, _: Request) -> ChainResult<Request, Status> {
943                ChainResult::Done(Status::Excluded)
944            }
945        }
946
947        let chain = RequestChain::new(vec![Box::new(ExampleHandler {})]);
948
949        let client = ClientBuilder::builder()
950            .plugin_request_chain(chain)
951            .build()
952            .client()
953            .unwrap();
954
955        let result = client.check("http://example.com");
956        let res = result.await.unwrap();
957        assert_eq!(res.status(), &Status::Excluded);
958    }
959}