Skip to main content

lychee_lib/
client.rs

1//! Handler of link checking operations.
2//!
3//! This module defines two structs, [`Client`] and [`ClientBuilder`].
4//! `Client` handles incoming requests and returns responses.
5//! `ClientBuilder` exposes a finer level of granularity for building
6//! a `Client`.
7//!
8//! For convenience, a free function [`check`] is provided for ad-hoc
9//! link checks.
10#![allow(
11    clippy::module_name_repetitions,
12    clippy::struct_excessive_bools,
13    clippy::default_trait_access,
14    clippy::used_underscore_binding
15)]
16use crate::remap::Remap;
17use std::{collections::HashSet, sync::Arc, time::Duration};
18
19use http::{
20    StatusCode,
21    header::{HeaderMap, HeaderValue},
22};
23use log::debug;
24use octocrab::Octocrab;
25use regex::RegexSet;
26use reqwest::{header, redirect, tls};
27use reqwest_cookie_store::CookieStoreMutex;
28use secrecy::{ExposeSecret, SecretString};
29use typed_builder::TypedBuilder;
30
31use crate::{
32    BaseInfo, BasicAuthCredentials, ErrorKind, Request, Response, Result, Status, Uri,
33    chain::RequestChain,
34    checker::{file::FileChecker, mail::MailChecker, website::WebsiteChecker},
35    filter::Filter,
36    ratelimit::{ClientMap, HostConfigs, HostKey, HostPool, RateLimitConfig},
37    remap::Remaps,
38    types::{DEFAULT_ACCEPTED_STATUS_CODES, Redirects, redirect_history::RedirectHistory},
39};
40
41/// Default number of redirects that are followed.
42pub const DEFAULT_MAX_REDIRECTS: usize = 10;
43/// Default number of retries before a request is deemed as failed, 3.
44pub const DEFAULT_MAX_RETRIES: u64 = 3;
45/// Default wait time in seconds between retries, 1.
46pub const DEFAULT_RETRY_WAIT_TIME_SECS: u64 = 1;
47/// Default timeout in seconds before a request is deemed as failed, 20.
48pub const DEFAULT_TIMEOUT_SECS: u64 = 20;
49/// Default user agent, `lychee-<PKG_VERSION>`.
50pub const DEFAULT_USER_AGENT: &str = concat!("lychee/", env!("CARGO_PKG_VERSION"));
51
52// Constants currently not configurable by the user.
53/// A timeout for only the connect phase of a [`Client`].
54const CONNECT_TIMEOUT: Duration = Duration::from_secs(10);
55/// TCP keepalive.
56///
57/// See <https://tldp.org/HOWTO/TCP-Keepalive-HOWTO/overview.html> for more
58/// information.
59const TCP_KEEPALIVE: Duration = Duration::from_secs(60);
60
61/// Controls which fragment types should be checked for supported links.
62#[derive(Debug, Clone, Copy, Default, PartialEq, Eq)]
63pub struct FragmentCheckerOptions {
64    /// Check anchor fragments like `#section`.
65    pub check_anchor_fragments: bool,
66    /// Check text fragments like `#:~:text=example`.
67    pub check_text_fragments: bool,
68}
69
70impl FragmentCheckerOptions {
71    /// Returns `true` if either anchor or text fragment checking is enabled.
72    #[must_use]
73    pub const fn any_enabled(self) -> bool {
74        self.check_anchor_fragments || self.check_text_fragments
75    }
76}
77
78/// Builder for [`Client`].
79///
80/// See crate-level documentation for usage example.
81#[derive(TypedBuilder, Debug, Clone)]
82#[builder(field_defaults(default, setter(into)))]
83pub struct ClientBuilder {
84    /// Optional GitHub token used for GitHub links.
85    ///
86    /// This allows much more request before getting rate-limited.
87    ///
88    /// # Rate-limiting Defaults
89    ///
90    /// As of Feb 2022, it's 60 per hour without GitHub token v.s.
91    /// 5000 per hour with token.
92    github_token: Option<SecretString>,
93
94    /// Remap URIs matching a pattern to a different URI.
95    ///
96    /// This makes it possible to remap any HTTP/HTTPS endpoint to a different
97    /// HTTP/HTTPS one. This feature could also be used to proxy
98    /// certain requests.
99    ///
100    /// # Usage Notes
101    ///
102    /// Use with caution because a large set of remap rules may cause
103    /// performance issues.
104    ///
105    /// Furthermore rules are executed sequentially and multiple mappings for
106    /// the same URI are allowed, so it is up to the library user's discretion to
107    /// make sure rules don't conflict with each other.
108    remaps: Option<Remaps>,
109
110    /// Automatically append file extensions to `file://` URIs as needed
111    ///
112    /// This option takes effect on `file://` URIs which do not exist.
113    fallback_extensions: Vec<String>,
114
115    /// Index file names to use when resolving `file://` URIs which point to
116    /// directories.
117    ///
118    /// For local directory links, if this is non-`None`, then at least one
119    /// index file from this list must exist in order for the link to be
120    /// considered valid. Index files names are required to match regular
121    /// files, aside from the special `.` name which will match the
122    /// directory itself.
123    ///
124    /// If `None`, index file checking is disabled and directory links are valid
125    /// as long as the directory exists on disk.
126    ///
127    /// In the [`ClientBuilder`], this defaults to `None`.
128    #[builder(default = None)]
129    index_files: Option<Vec<String>>,
130
131    /// Links matching this set of regular expressions are **always** checked.
132    ///
133    /// This has higher precedence over [`ClientBuilder::excludes`], **but**
134    /// has lower precedence compared to any other `exclude_` fields or
135    /// [`ClientBuilder::schemes`] below.
136    includes: Option<RegexSet>,
137
138    /// Links matching this set of regular expressions are ignored, **except**
139    /// when a link also matches against [`ClientBuilder::includes`].
140    excludes: Option<RegexSet>,
141
142    /// When `true`, exclude all private network addresses.
143    ///
144    /// This effectively turns on the following fields:
145    /// - [`ClientBuilder::exclude_private_ips`]
146    /// - [`ClientBuilder::exclude_link_local_ips`]
147    /// - [`ClientBuilder::exclude_loopback_ips`]
148    exclude_all_private: bool,
149
150    /// When `true`, exclude private IP addresses.
151    ///
152    /// # IPv4
153    ///
154    /// The private address ranges are defined in [IETF RFC 1918] and include:
155    ///
156    ///  - `10.0.0.0/8`
157    ///  - `172.16.0.0/12`
158    ///  - `192.168.0.0/16`
159    ///
160    /// # IPv6
161    ///
162    /// The address is a unique local address (`fc00::/7`).
163    ///
164    /// This property is defined in [IETF RFC 4193].
165    ///
166    /// # Note
167    ///
168    /// Unicast site-local network was defined in [IETF RFC 4291], but was fully
169    /// deprecated in [IETF RFC 3879]. So it is **NOT** considered as private on
170    /// this purpose.
171    ///
172    /// [IETF RFC 1918]: https://tools.ietf.org/html/rfc1918
173    /// [IETF RFC 4193]: https://tools.ietf.org/html/rfc4193
174    /// [IETF RFC 4291]: https://tools.ietf.org/html/rfc4291
175    /// [IETF RFC 3879]: https://tools.ietf.org/html/rfc3879
176    exclude_private_ips: bool,
177
178    /// When `true`, exclude link-local IPs.
179    ///
180    /// # IPv4
181    ///
182    /// The address is `169.254.0.0/16`.
183    ///
184    /// This property is defined by [IETF RFC 3927].
185    ///
186    /// # IPv6
187    ///
188    /// The address is a unicast address with link-local scope,  as defined in
189    /// [RFC 4291].
190    ///
191    /// A unicast address has link-local scope if it has the prefix `fe80::/10`,
192    /// as per [RFC 4291 section 2.4].
193    ///
194    /// [IETF RFC 3927]: https://tools.ietf.org/html/rfc3927
195    /// [RFC 4291]: https://tools.ietf.org/html/rfc4291
196    /// [RFC 4291 section 2.4]: https://tools.ietf.org/html/rfc4291#section-2.4
197    exclude_link_local_ips: bool,
198
199    /// When `true`, exclude loopback IP addresses.
200    ///
201    /// # IPv4
202    ///
203    /// This is a loopback address (`127.0.0.0/8`).
204    ///
205    /// This property is defined by [IETF RFC 1122].
206    ///
207    /// # IPv6
208    ///
209    /// This is the loopback address (`::1`), as defined in
210    /// [IETF RFC 4291 section 2.5.3].
211    ///
212    /// [IETF RFC 1122]: https://tools.ietf.org/html/rfc1122
213    /// [IETF RFC 4291 section 2.5.3]: https://tools.ietf.org/html/rfc4291#section-2.5.3
214    exclude_loopback_ips: bool,
215
216    /// When `true`, check mail addresses.
217    include_mail: bool,
218
219    /// Maximum number of redirects per request before returning an error.
220    ///
221    /// Defaults to [`DEFAULT_MAX_REDIRECTS`].
222    #[builder(default = DEFAULT_MAX_REDIRECTS)]
223    max_redirects: usize,
224
225    /// Maximum number of retries per request before returning an error.
226    ///
227    /// Defaults to [`DEFAULT_MAX_RETRIES`].
228    #[builder(default = DEFAULT_MAX_RETRIES)]
229    max_retries: u64,
230
231    /// Minimum accepted TLS version.
232    min_tls_version: Option<tls::Version>,
233
234    /// User-agent used for checking links.
235    ///
236    /// Defaults to [`DEFAULT_USER_AGENT`].
237    ///
238    /// # Notes
239    ///
240    /// This may be helpful for bypassing certain firewalls.
241    // Faking the user agent is necessary for some websites, unfortunately.
242    // Otherwise we get a 403 from the firewall (e.g. Sucuri/Cloudproxy on ldra.com).
243    #[builder(default_code = "String::from(DEFAULT_USER_AGENT)")]
244    user_agent: String,
245
246    /// When `true`, accept invalid SSL certificates.
247    ///
248    /// # Warning
249    ///
250    /// You should think very carefully before allowing invalid SSL
251    /// certificates. It will accept any certificate for any site to be trusted
252    /// including expired certificates. This introduces significant
253    /// vulnerabilities, and should only be used as a last resort.
254    // TODO: We should add a warning message in CLI. (Lucius, Jan 2023)
255    allow_insecure: bool,
256
257    /// Set of accepted URL schemes.
258    ///
259    /// Only links with matched URI schemes are checked. This has no effect when
260    /// it's empty.
261    schemes: HashSet<String>,
262
263    /// Default [headers] for every request.
264    ///
265    /// This allows working around validation issues on some websites. See also
266    /// [here] for usage examples.
267    ///
268    /// [headers]: https://docs.rs/http/latest/http/header/struct.HeaderName.html
269    /// [here]: https://docs.rs/reqwest/latest/reqwest/struct.ClientBuilder.html#method.default_headers
270    custom_headers: HeaderMap,
271
272    /// HTTP method used for requests, e.g. `GET` or `HEAD`.
273    #[builder(default = reqwest::Method::GET)]
274    method: reqwest::Method,
275
276    /// Set of accepted return codes / status codes.
277    ///
278    /// Unmatched return codes/ status codes are deemed as errors.
279    #[builder(default = DEFAULT_ACCEPTED_STATUS_CODES.clone())]
280    accepted: HashSet<StatusCode>,
281
282    /// Response timeout per request.
283    timeout: Option<Duration>,
284
285    /// Base for resolving paths.
286    ///
287    /// E.g. if the base is `/home/user/` and the path is `file.txt`, the
288    /// resolved path would be `/home/user/file.txt`.
289    base: BaseInfo,
290
291    /// Initial time between retries of failed requests.
292    ///
293    /// Defaults to [`DEFAULT_RETRY_WAIT_TIME_SECS`].
294    ///
295    /// # Notes
296    ///
297    /// For each request, the wait time increases using an exponential backoff
298    /// mechanism. For example, if the value is 1 second, then it waits for
299    /// 2 ^ (N-1) seconds before the N-th retry.
300    ///
301    /// This prevents spending too much system resources on slow responders and
302    /// prioritizes other requests.
303    #[builder(default_code = "Duration::from_secs(DEFAULT_RETRY_WAIT_TIME_SECS as u64)")]
304    retry_wait_time: Duration,
305
306    /// When `true`, requires using HTTPS when it's available.
307    ///
308    /// This would treat unencrypted links as errors when HTTPS is available.
309    /// It has no effect on non-HTTP schemes or if the URL doesn't support
310    /// HTTPS.
311    require_https: bool,
312
313    /// Cookie store used for requests.
314    ///
315    /// See <https://docs.rs/reqwest/latest/reqwest/struct.ClientBuilder.html#method.cookie_store>
316    cookie_jar: Option<Arc<CookieStoreMutex>>,
317
318    /// Controls which fragment types are checked in links.
319    fragment_checker_options: FragmentCheckerOptions,
320
321    /// Enable the checking of wikilinks in markdown files.
322    /// Note that base must not be `None` if you set this `true`.
323    include_wikilinks: bool,
324
325    /// Requests run through this chain where each item in the chain
326    /// can modify the request. A chained item can also decide to exit
327    /// early and return a status, so that subsequent chain items are
328    /// skipped and the lychee-internal request chain is not activated.
329    plugin_request_chain: RequestChain,
330
331    /// Global rate limiting configuration that applies as defaults to all hosts
332    rate_limit_config: RateLimitConfig,
333
334    /// Per-host configuration overrides
335    hosts: HostConfigs,
336}
337
338impl Default for ClientBuilder {
339    #[inline]
340    fn default() -> Self {
341        Self::builder().build()
342    }
343}
344
345impl ClientBuilder {
346    /// Instantiates a [`Client`].
347    ///
348    /// # Errors
349    ///
350    /// Returns an `Err` if:
351    /// - The user-agent contains characters other than ASCII 32-127.
352    /// - The reqwest client cannot be instantiated. This occurs if a TLS
353    ///   backend cannot be initialized or the resolver fails to load the system
354    ///   configuration. See [here].
355    /// - The GitHub client cannot be created. Since the implementation also
356    ///   uses reqwest under the hood, this errors in the same circumstances as
357    ///   the last one.
358    ///
359    /// [here]: https://docs.rs/reqwest/latest/reqwest/struct.ClientBuilder.html#errors
360    pub fn client(self) -> Result<Client> {
361        let redirect_history = RedirectHistory::new();
362        let reqwest_client = self
363            .build_client(redirect_history.clone())?
364            .build()
365            .map_err(ErrorKind::BuildRequestClient)?;
366
367        let client_map = self.build_host_clients(&redirect_history)?;
368
369        let host_pool = HostPool::new(
370            self.rate_limit_config,
371            self.hosts,
372            reqwest_client,
373            client_map,
374        );
375
376        let github_client = match self.github_token.as_ref().map(ExposeSecret::expose_secret) {
377            Some(token) if !token.is_empty() => Some(
378                Octocrab::builder()
379                    .personal_token(token.to_string())
380                    .build()
381                    // this is essentially the same `reqwest::ClientBuilder::build` error
382                    // see https://docs.rs/octocrab/0.18.1/src/octocrab/lib.rs.html#360-364
383                    .map_err(|e: octocrab::Error| ErrorKind::BuildGithubClient(Box::new(e)))?,
384            ),
385            _ => None,
386        };
387
388        let filter = Filter {
389            includes: self.includes.map(Into::into),
390            excludes: self.excludes.map(Into::into),
391            schemes: self.schemes,
392            // exclude_all_private option turns on all "private" excludes,
393            // including private IPs, link-local IPs and loopback IPs
394            exclude_private_ips: self.exclude_all_private || self.exclude_private_ips,
395            exclude_link_local_ips: self.exclude_all_private || self.exclude_link_local_ips,
396            exclude_loopback_ips: self.exclude_all_private || self.exclude_loopback_ips,
397            include_mail: self.include_mail,
398        };
399
400        let website_checker = WebsiteChecker::new(
401            self.method,
402            self.retry_wait_time,
403            redirect_history.clone(),
404            self.max_retries,
405            self.accepted,
406            github_client,
407            self.require_https,
408            self.plugin_request_chain,
409            self.fragment_checker_options,
410            Arc::new(host_pool),
411        );
412
413        Ok(Client {
414            remaps: self.remaps,
415            filter,
416            email_checker: MailChecker::new(self.timeout),
417            website_checker,
418            file_checker: FileChecker::new(
419                &self.base,
420                self.fallback_extensions,
421                self.index_files,
422                self.fragment_checker_options,
423                self.include_wikilinks,
424            )?,
425        })
426    }
427
428    /// Build the host-specific clients with their host-specific headers
429    fn build_host_clients(&self, redirect_history: &RedirectHistory) -> Result<ClientMap> {
430        self.hosts
431            .iter()
432            .map(|(host, config)| {
433                let mut headers = self.default_headers()?;
434                headers.extend(config.headers.clone());
435                let client = self
436                    .build_client(redirect_history.clone())?
437                    .default_headers(headers)
438                    .build()
439                    .map_err(ErrorKind::BuildRequestClient)?;
440                Ok((HostKey::from(host.as_str()), client))
441            })
442            .collect()
443    }
444
445    /// Create a [`reqwest::ClientBuilder`] based on various fields
446    fn build_client(&self, redirect_history: RedirectHistory) -> Result<reqwest::ClientBuilder> {
447        let mut builder = reqwest::ClientBuilder::new()
448            .gzip(true)
449            .default_headers(self.default_headers()?)
450            .danger_accept_invalid_certs(self.allow_insecure)
451            .connect_timeout(CONNECT_TIMEOUT)
452            .tcp_keepalive(TCP_KEEPALIVE)
453            .redirect(redirect_policy(redirect_history, self.max_redirects));
454
455        if let Some(cookie_jar) = self.cookie_jar.clone() {
456            builder = builder.cookie_provider(cookie_jar);
457        }
458
459        if let Some(min_tls) = self.min_tls_version {
460            builder = builder.min_tls_version(min_tls);
461        }
462
463        if let Some(timeout) = self.timeout {
464            builder = builder.timeout(timeout);
465        }
466
467        Ok(builder)
468    }
469
470    fn default_headers(&self) -> Result<HeaderMap> {
471        let user_agent = self.user_agent.clone();
472        let mut headers = self.custom_headers.clone();
473
474        if let Some(prev_user_agent) =
475            headers.insert(header::USER_AGENT, HeaderValue::try_from(&user_agent)?)
476        {
477            debug!(
478                "Found user-agent in headers: {}. Overriding it with {user_agent}.",
479                prev_user_agent.to_str().unwrap_or("�"),
480            );
481        }
482
483        headers.insert(
484            header::TRANSFER_ENCODING,
485            HeaderValue::from_static("chunked"),
486        );
487
488        Ok(headers)
489    }
490}
491
492/// Create our custom [`redirect::Policy`] in order to stop following redirects
493/// once `max_redirects` is reached and to record redirections for reporting.
494fn redirect_policy(redirect_history: RedirectHistory, max_redirects: usize) -> redirect::Policy {
495    redirect::Policy::custom(move |attempt| {
496        if attempt.previous().len() > max_redirects {
497            attempt.stop()
498        } else {
499            redirect_history.record_redirects(&attempt);
500            debug!("Following redirect to {}", attempt.url());
501            attempt.follow()
502        }
503    })
504}
505
506/// Handles incoming requests and returns responses.
507///
508/// See [`ClientBuilder`] which contains sane defaults for all configuration
509/// options.
510#[derive(Debug, Clone)]
511pub struct Client {
512    /// Optional remap rules for URIs matching pattern.
513    remaps: Option<Remaps>,
514
515    /// Rules to decide whether a given link should be checked or ignored.
516    filter: Filter,
517
518    /// A checker for website URLs.
519    website_checker: WebsiteChecker,
520
521    /// A checker for file URLs.
522    file_checker: FileChecker,
523
524    /// A checker for email URLs.
525    email_checker: MailChecker,
526}
527
528impl Client {
529    /// Get `HostPool`
530    #[must_use]
531    pub fn host_pool(&self) -> Arc<HostPool> {
532        self.website_checker.host_pool()
533    }
534
535    /// Check a single request.
536    ///
537    /// `request` can be either a [`Request`] or a type that can be converted
538    /// into it. In any case, it must represent a valid URI.
539    ///
540    /// # Errors
541    ///
542    /// Returns an `Err` if:
543    /// - `request` does not represent a valid URI.
544    /// - Encrypted connection for a HTTP URL is available but unused. (Only
545    ///   checked when `Client::require_https` is `true`.)
546    #[allow(clippy::missing_panics_doc)]
547    pub async fn check<T, E>(&self, request: T) -> Result<Response>
548    where
549        Request: TryFrom<T, Error = E>,
550        ErrorKind: From<E>,
551    {
552        let Request {
553            mut uri,
554            credentials,
555            source,
556            span,
557            ..
558        } = request.try_into()?;
559
560        let start = std::time::Instant::now(); // Measure check time
561        let remap = self.remap(&mut uri)?.inspect(|r| debug!("Remapping {r}"));
562
563        let (status, redirects) = match uri.scheme() {
564            _ if self.is_excluded(&uri) => (Status::Excluded, None),
565            _ if uri.is_tel() => (Status::Excluded, None), // We don't check tel: URIs
566            _ if uri.is_file() => (self.check_file(&uri).await, None),
567            _ if uri.is_mail() => (self.check_mail(&uri).await, None),
568            _ => self.check_website(&uri, credentials).await,
569        };
570
571        Ok(Response::new(
572            uri,
573            status,
574            redirects,
575            remap,
576            source.into(),
577            span,
578            Some(start.elapsed()),
579        ))
580    }
581
582    /// Check a single file using the file checker.
583    pub async fn check_file(&self, uri: &Uri) -> Status {
584        self.file_checker.check(uri).await
585    }
586
587    /// Remap [`Uri`] as a side-effect, using the client-defined remap rules.
588    /// Return `Some` only if a remap was performed.
589    ///
590    /// # Errors
591    ///
592    /// Returns an `Err` if the remapped `uri` is not a valid URI.
593    pub fn remap(&self, uri: &mut Uri) -> Result<Option<Remap>> {
594        match self.remaps {
595            Some(ref remaps) => {
596                let remapped = remaps.remap(uri)?;
597                if let Some(remapped) = &remapped {
598                    *uri = remapped.new.clone();
599                }
600
601                Ok(remapped)
602            }
603            None => Ok(None),
604        }
605    }
606
607    /// Returns whether the given `uri` should be ignored from checking.
608    #[must_use]
609    pub fn is_excluded(&self, uri: &Uri) -> bool {
610        self.filter.is_excluded(uri)
611    }
612
613    /// Checks the given URI of a website.
614    ///
615    /// # Errors
616    ///
617    /// This returns an `Err` if
618    /// - The URI is invalid.
619    /// - The request failed.
620    /// - The response status code is not accepted.
621    /// - The URI cannot be converted to HTTPS.
622    pub async fn check_website(
623        &self,
624        uri: &Uri,
625        credentials: Option<BasicAuthCredentials>,
626    ) -> (Status, Option<Redirects>) {
627        self.website_checker.check_website(uri, credentials).await
628    }
629
630    /// Checks a `mailto` URI.
631    pub async fn check_mail(&self, uri: &Uri) -> Status {
632        self.email_checker.check_mail(uri).await
633    }
634}
635
636/// A shorthand function to check a single URI.
637///
638/// This provides the simplest link check utility without having to create a
639/// [`Client`]. For more complex scenarios, see documentation of
640/// [`ClientBuilder`] instead.
641///
642/// # Errors
643///
644/// Returns an `Err` if:
645/// - The request client cannot be built (see [`ClientBuilder::client`] for
646///   failure cases).
647/// - The request cannot be checked (see [`Client::check`] for failure cases).
648pub async fn check<T, E>(request: T) -> Result<Response>
649where
650    Request: TryFrom<T, Error = E>,
651    ErrorKind: From<E>,
652{
653    let client = ClientBuilder::builder().build().client()?;
654    client.check(request).await
655}
656
657#[cfg(test)]
658mod tests {
659    use std::{
660        fs::File,
661        time::{Duration, Instant},
662    };
663
664    use async_trait::async_trait;
665    use http::{StatusCode, header::HeaderMap};
666    use reqwest::header;
667    use tempfile::tempdir;
668    use test_utils::get_mock_client_response;
669    use test_utils::mock_server;
670    use test_utils::redirecting_mock_server;
671    use wiremock::{
672        Mock,
673        matchers::{method, path},
674    };
675
676    use super::ClientBuilder;
677    use crate::{
678        ErrorKind, Redirect, Redirects, Request, Status, Uri,
679        chain::{ChainResult, Handler, RequestChain},
680        remap::{Remap, Remaps},
681    };
682
683    #[tokio::test]
684    async fn test_nonexistent() {
685        let mock_server = mock_server!(StatusCode::NOT_FOUND);
686        let res = get_mock_client_response!(mock_server.uri()).await;
687
688        assert!(res.status().is_error());
689    }
690
691    #[tokio::test]
692    async fn test_nonexistent_with_path() {
693        let res = get_mock_client_response!("http://127.0.0.1/invalid").await;
694        assert!(res.status().is_error());
695    }
696
697    #[tokio::test]
698    async fn test_github() {
699        let res = get_mock_client_response!("https://github.com/lycheeverse/lychee").await;
700        assert!(res.status().is_success());
701    }
702
703    #[tokio::test]
704    async fn test_github_nonexistent_repo() {
705        let res = get_mock_client_response!("https://github.com/lycheeverse/not-lychee").await;
706        assert!(res.status().is_error());
707    }
708
709    #[tokio::test]
710    async fn test_github_nonexistent_file() {
711        let res = get_mock_client_response!(
712            "https://github.com/lycheeverse/lychee/blob/master/NON_EXISTENT_FILE.md",
713        )
714        .await;
715        assert!(res.status().is_error());
716    }
717
718    #[tokio::test]
719    async fn test_youtube() {
720        // This is applying a quirk. See the quirks module.
721        let res = get_mock_client_response!("https://www.youtube.com/watch?v=NlKuICiT470&list=PLbWDhxwM_45mPVToqaIZNbZeIzFchsKKQ&index=7").await;
722        assert!(res.status().is_success());
723
724        let res = get_mock_client_response!("https://www.youtube.com/watch?v=invalidNlKuICiT470&list=PLbWDhxwM_45mPVToqaIZNbZeIzFchsKKQ&index=7").await;
725        assert!(res.status().is_error());
726    }
727
728    #[tokio::test]
729    async fn test_basic_auth() {
730        let mut r: Request = "https://authenticationtest.com/HTTPAuth/"
731            .try_into()
732            .unwrap();
733
734        let res = get_mock_client_response!(r.clone()).await;
735        assert_eq!(res.status().code(), Some(401.try_into().unwrap()));
736
737        r.credentials = Some(crate::BasicAuthCredentials {
738            username: "user".into(),
739            password: "pass".into(),
740        });
741
742        let res = get_mock_client_response!(r).await;
743        assert!(res.status().is_success());
744    }
745
746    #[tokio::test]
747    async fn test_non_github() {
748        let mock_server = mock_server!(StatusCode::OK);
749        let res = get_mock_client_response!(mock_server.uri()).await;
750
751        assert!(res.status().is_success());
752    }
753
754    #[tokio::test]
755    async fn test_invalid_ssl() {
756        let res = get_mock_client_response!("https://expired.badssl.com/").await;
757
758        assert!(res.status().is_error());
759
760        // Same, but ignore certificate error
761        let res = ClientBuilder::builder()
762            .allow_insecure(true)
763            .build()
764            .client()
765            .unwrap()
766            .check("https://expired.badssl.com/")
767            .await
768            .unwrap();
769        assert!(res.status().is_success());
770    }
771
772    #[tokio::test]
773    async fn test_file() {
774        let dir = tempdir().unwrap();
775        let file = dir.path().join("temp");
776        File::create(file).unwrap();
777        let uri = format!("file://{}", dir.path().join("temp").to_str().unwrap());
778
779        let res = get_mock_client_response!(uri).await;
780        assert!(res.status().is_success());
781    }
782
783    #[tokio::test]
784    async fn test_custom_headers() {
785        // See https://github.com/rust-lang/crates.io/issues/788
786        let mut custom = HeaderMap::new();
787        custom.insert(header::ACCEPT, "text/html".parse().unwrap());
788        let res = ClientBuilder::builder()
789            .custom_headers(custom)
790            .build()
791            .client()
792            .unwrap()
793            .check("https://crates.io/crates/lychee")
794            .await
795            .unwrap();
796        assert!(res.status().is_success());
797    }
798
799    #[tokio::test]
800    async fn test_exclude_mail_by_default() {
801        let client = ClientBuilder::builder()
802            .exclude_all_private(true)
803            .build()
804            .client()
805            .unwrap();
806        assert!(client.is_excluded(&Uri {
807            url: "mailto://mail@example.com".try_into().unwrap()
808        }));
809    }
810
811    #[tokio::test]
812    async fn test_include_mail() {
813        let client = ClientBuilder::builder()
814            .include_mail(false)
815            .exclude_all_private(true)
816            .build()
817            .client()
818            .unwrap();
819        assert!(client.is_excluded(&Uri {
820            url: "mailto://mail@example.com".try_into().unwrap()
821        }));
822
823        let client = ClientBuilder::builder()
824            .include_mail(true)
825            .exclude_all_private(true)
826            .build()
827            .client()
828            .unwrap();
829        assert!(!client.is_excluded(&Uri {
830            url: "mailto://mail@example.com".try_into().unwrap()
831        }));
832    }
833
834    #[tokio::test]
835    async fn test_include_tel() {
836        let client = ClientBuilder::builder().build().client().unwrap();
837        assert!(client.is_excluded(&Uri {
838            url: "tel:1234567890".try_into().unwrap()
839        }));
840    }
841
842    #[tokio::test]
843    async fn test_require_https() {
844        let client = ClientBuilder::builder().build().client().unwrap();
845        let res = client.check("http://rust-lang.org/").await.unwrap();
846        assert!(res.status().is_success());
847
848        // Same request will fail if HTTPS is required
849        let client = ClientBuilder::builder()
850            .require_https(true)
851            .build()
852            .client()
853            .unwrap();
854        let res = client.check("http://rust-lang.org/").await.unwrap();
855        assert!(res.status().is_error());
856    }
857
858    #[tokio::test]
859    async fn test_timeout() {
860        // Note: this checks response timeout, not connect timeout.
861        // To check connect timeout, we'd have to do something more involved,
862        // see: https://github.com/LukeMathWalker/wiremock-rs/issues/19
863        let mock_delay = Duration::from_millis(20);
864        let checker_timeout = Duration::from_millis(10);
865        assert!(mock_delay > checker_timeout);
866
867        let mock_server = mock_server!(StatusCode::OK, set_delay(mock_delay));
868
869        let client = ClientBuilder::builder()
870            .timeout(checker_timeout)
871            .max_retries(0u64)
872            .build()
873            .client()
874            .unwrap();
875
876        let res = client.check(mock_server.uri()).await.unwrap();
877        assert!(res.status().is_timeout());
878    }
879
880    #[tokio::test]
881    async fn test_exponential_backoff() {
882        let mock_delay = Duration::from_millis(20);
883        let checker_timeout = Duration::from_millis(10);
884        assert!(mock_delay > checker_timeout);
885
886        let mock_server = mock_server!(StatusCode::OK, set_delay(mock_delay));
887
888        // Perform a warm-up request to ensure the lazy regexes
889        // in lychee-lib/src/quirks/mod.rs are compiled.
890        // On some platforms, this can take some time(approx. 110ms),
891        // which should not be counted in the test.
892        let warm_up_client = ClientBuilder::builder()
893            .max_retries(0_u64)
894            .build()
895            .client()
896            .unwrap();
897        let _res = warm_up_client.check(mock_server.uri()).await.unwrap();
898
899        let client = ClientBuilder::builder()
900            .timeout(checker_timeout)
901            .max_retries(3_u64)
902            .retry_wait_time(Duration::from_millis(50))
903            .build()
904            .client()
905            .unwrap();
906
907        // Summary:
908        // 1. First request fails with timeout (after 10ms)
909        // 2. Retry after 50ms (total 60ms)
910        // 3. Second request fails with timeout (after 10ms)
911        // 4. Retry after 100ms (total 160ms)
912        // 5. Third request fails with timeout (after 10ms)
913        // 6. Retry after 200ms (total 360ms)
914        // Total: 360ms
915
916        let start = Instant::now();
917        let res = client.check(mock_server.uri()).await.unwrap();
918        let end = start.elapsed();
919
920        assert!(res.status().is_error());
921
922        // on slow connections, this might take a bit longer than nominal
923        // backed-off timeout (7 secs)
924        assert!((350..=550).contains(&end.as_millis()));
925    }
926
927    #[tokio::test]
928    async fn test_avoid_reqwest_panic() {
929        let client = ClientBuilder::builder().build().client().unwrap();
930        // This request will result in an Unsupported status, but it won't panic
931        let res = client.check("http://\"").await.unwrap();
932
933        assert!(matches!(
934            res.status(),
935            Status::Unsupported(ErrorKind::BuildRequestClient(_))
936        ));
937        assert!(res.status().is_unsupported());
938    }
939
940    #[tokio::test]
941    async fn test_max_redirects() {
942        let mock_server = wiremock::MockServer::start().await;
943
944        let redirect_uri = format!("{}/redirect", &mock_server.uri());
945        let redirect = wiremock::ResponseTemplate::new(StatusCode::PERMANENT_REDIRECT)
946            .insert_header("Location", redirect_uri.as_str());
947
948        let redirect_count = 15usize;
949        let initial_invocation = 1;
950
951        // Set up infinite redirect loop
952        Mock::given(method("GET"))
953            .and(path("/redirect"))
954            .respond_with(move |_: &_| redirect.clone())
955            .expect(initial_invocation + redirect_count as u64)
956            .mount(&mock_server)
957            .await;
958
959        let res = ClientBuilder::builder()
960            .max_redirects(redirect_count)
961            .build()
962            .client()
963            .unwrap()
964            .check(redirect_uri.clone())
965            .await
966            .unwrap();
967
968        assert!(matches!(
969            res.status(),
970            Status::Error(ErrorKind::RejectedStatusCode(
971                StatusCode::PERMANENT_REDIRECT
972            ))
973        ));
974        assert!(matches!(
975            res.redirects(),
976            Some(redirects) if redirects.count() == redirect_count,
977        ));
978    }
979
980    #[tokio::test]
981    async fn test_redirects() {
982        redirecting_mock_server!(async |redirect_url: Url, ok_url| {
983            let res = ClientBuilder::builder()
984                .max_redirects(1_usize)
985                .build()
986                .client()
987                .unwrap()
988                .check(Uri::from((redirect_url).clone()))
989                .await
990                .unwrap();
991
992            let mut redirects = Redirects::new(redirect_url);
993            redirects.push(Redirect {
994                url: ok_url,
995                code: StatusCode::PERMANENT_REDIRECT,
996            });
997
998            assert_eq!(res.status(), &Status::Ok(StatusCode::OK));
999            assert_eq!(res.redirects(), Some(&redirects));
1000        })
1001        .await;
1002    }
1003
1004    #[tokio::test]
1005    async fn test_remaps() {
1006        let mapped = String::from("file:///nope");
1007        let client = ClientBuilder::builder()
1008            .remaps(Remaps::new(vec![(
1009                regex::Regex::new("http://example.org").unwrap(),
1010                mapped.clone(),
1011            )]))
1012            .build()
1013            .client()
1014            .unwrap();
1015
1016        let input = Uri::try_from("http://example.org").unwrap();
1017        let res = client.check(input.clone()).await.unwrap();
1018
1019        assert_eq!(
1020            res.status(),
1021            &Status::Error(ErrorKind::InvalidFilePath(
1022                format!("{mapped}/").try_into().unwrap(),
1023            ))
1024        );
1025        assert_eq!(
1026            res.remap(),
1027            Some(&Remap {
1028                original: input,
1029                new: format!("{mapped}/").try_into().unwrap(),
1030            })
1031        );
1032    }
1033
1034    #[tokio::test]
1035    async fn test_unsupported_scheme() {
1036        let examples = vec![
1037            "ftp://example.com",
1038            "gopher://example.com",
1039            "slack://example.com",
1040        ];
1041
1042        for example in examples {
1043            let client = ClientBuilder::builder().build().client().unwrap();
1044            let res = client.check(example).await.unwrap();
1045            assert!(res.status().is_unsupported());
1046        }
1047    }
1048
1049    #[tokio::test]
1050    async fn test_chain() {
1051        use reqwest::Request;
1052
1053        #[derive(Debug)]
1054        struct ExampleHandler();
1055
1056        #[async_trait]
1057        impl Handler<Request, Status> for ExampleHandler {
1058            async fn handle(&mut self, _: Request) -> ChainResult<Request, Status> {
1059                ChainResult::Done(Status::Excluded)
1060            }
1061        }
1062
1063        let chain = RequestChain::new(vec![Box::new(ExampleHandler {})]);
1064
1065        let client = ClientBuilder::builder()
1066            .plugin_request_chain(chain)
1067            .build()
1068            .client()
1069            .unwrap();
1070
1071        let result = client.check("http://example.com");
1072        let res = result.await.unwrap();
1073        assert_eq!(res.status(), &Status::Excluded);
1074    }
1075}