Skip to main content

lychee_lib/
client.rs

1//! Handler of link checking operations.
2//!
3//! This module defines two structs, [`Client`] and [`ClientBuilder`].
4//! `Client` handles incoming requests and returns responses.
5//! `ClientBuilder` exposes a finer level of granularity for building
6//! a `Client`.
7//!
8//! For convenience, a free function [`check`] is provided for ad-hoc
9//! link checks.
10#![allow(
11    clippy::module_name_repetitions,
12    clippy::struct_excessive_bools,
13    clippy::default_trait_access,
14    clippy::used_underscore_binding
15)]
16use std::{collections::HashSet, sync::Arc, time::Duration};
17
18use http::{
19    StatusCode,
20    header::{HeaderMap, HeaderValue},
21};
22use log::debug;
23use octocrab::Octocrab;
24use regex::RegexSet;
25use reqwest::{header, redirect, tls};
26use reqwest_cookie_store::CookieStoreMutex;
27use secrecy::{ExposeSecret, SecretString};
28use typed_builder::TypedBuilder;
29
30use crate::{
31    Base, BasicAuthCredentials, ErrorKind, Request, Response, Result, Status, Uri,
32    chain::RequestChain,
33    checker::{file::FileChecker, mail::MailChecker, website::WebsiteChecker},
34    filter::Filter,
35    ratelimit::{ClientMap, HostConfigs, HostKey, HostPool, RateLimitConfig},
36    remap::Remaps,
37    types::{DEFAULT_ACCEPTED_STATUS_CODES, redirect_history::RedirectHistory},
38};
39
40/// Default number of redirects before a request is deemed as failed, 5.
41pub const DEFAULT_MAX_REDIRECTS: usize = 5;
42/// Default number of retries before a request is deemed as failed, 3.
43pub const DEFAULT_MAX_RETRIES: u64 = 3;
44/// Default wait time in seconds between retries, 1.
45pub const DEFAULT_RETRY_WAIT_TIME_SECS: usize = 1;
46/// Default timeout in seconds before a request is deemed as failed, 20.
47pub const DEFAULT_TIMEOUT_SECS: usize = 20;
48/// Default user agent, `lychee-<PKG_VERSION>`.
49pub const DEFAULT_USER_AGENT: &str = concat!("lychee/", env!("CARGO_PKG_VERSION"));
50
51// Constants currently not configurable by the user.
52/// A timeout for only the connect phase of a [`Client`].
53const CONNECT_TIMEOUT: Duration = Duration::from_secs(10);
54/// TCP keepalive.
55///
56/// See <https://tldp.org/HOWTO/TCP-Keepalive-HOWTO/overview.html> for more
57/// information.
58const TCP_KEEPALIVE: Duration = Duration::from_secs(60);
59
60/// Builder for [`Client`].
61///
62/// See crate-level documentation for usage example.
63#[derive(TypedBuilder, Debug, Clone)]
64#[builder(field_defaults(default, setter(into)))]
65pub struct ClientBuilder {
66    /// Optional GitHub token used for GitHub links.
67    ///
68    /// This allows much more request before getting rate-limited.
69    ///
70    /// # Rate-limiting Defaults
71    ///
72    /// As of Feb 2022, it's 60 per hour without GitHub token v.s.
73    /// 5000 per hour with token.
74    github_token: Option<SecretString>,
75
76    /// Remap URIs matching a pattern to a different URI.
77    ///
78    /// This makes it possible to remap any HTTP/HTTPS endpoint to a different
79    /// HTTP/HTTPS one. This feature could also be used to proxy
80    /// certain requests.
81    ///
82    /// # Usage Notes
83    ///
84    /// Use with caution because a large set of remapping rules may cause
85    /// performance issues.
86    ///
87    /// Furthermore rules are executed sequentially and multiple mappings for
88    /// the same URI are allowed, so it is up to the library user's discretion to
89    /// make sure rules don't conflict with each other.
90    remaps: Option<Remaps>,
91
92    /// Automatically append file extensions to `file://` URIs as needed
93    ///
94    /// This option takes effect on `file://` URIs which do not exist.
95    fallback_extensions: Vec<String>,
96
97    /// Index file names to use when resolving `file://` URIs which point to
98    /// directories.
99    ///
100    /// For local directory links, if this is non-`None`, then at least one
101    /// index file from this list must exist in order for the link to be
102    /// considered valid. Index files names are required to match regular
103    /// files, aside from the special `.` name which will match the
104    /// directory itself.
105    ///
106    /// If `None`, index file checking is disabled and directory links are valid
107    /// as long as the directory exists on disk.
108    ///
109    /// In the [`ClientBuilder`], this defaults to `None`.
110    #[builder(default = None)]
111    index_files: Option<Vec<String>>,
112
113    /// Links matching this set of regular expressions are **always** checked.
114    ///
115    /// This has higher precedence over [`ClientBuilder::excludes`], **but**
116    /// has lower precedence compared to any other `exclude_` fields or
117    /// [`ClientBuilder::schemes`] below.
118    includes: Option<RegexSet>,
119
120    /// Links matching this set of regular expressions are ignored, **except**
121    /// when a link also matches against [`ClientBuilder::includes`].
122    excludes: Option<RegexSet>,
123
124    /// When `true`, exclude all private network addresses.
125    ///
126    /// This effectively turns on the following fields:
127    /// - [`ClientBuilder::exclude_private_ips`]
128    /// - [`ClientBuilder::exclude_link_local_ips`]
129    /// - [`ClientBuilder::exclude_loopback_ips`]
130    exclude_all_private: bool,
131
132    /// When `true`, exclude private IP addresses.
133    ///
134    /// # IPv4
135    ///
136    /// The private address ranges are defined in [IETF RFC 1918] and include:
137    ///
138    ///  - `10.0.0.0/8`
139    ///  - `172.16.0.0/12`
140    ///  - `192.168.0.0/16`
141    ///
142    /// # IPv6
143    ///
144    /// The address is a unique local address (`fc00::/7`).
145    ///
146    /// This property is defined in [IETF RFC 4193].
147    ///
148    /// # Note
149    ///
150    /// Unicast site-local network was defined in [IETF RFC 4291], but was fully
151    /// deprecated in [IETF RFC 3879]. So it is **NOT** considered as private on
152    /// this purpose.
153    ///
154    /// [IETF RFC 1918]: https://tools.ietf.org/html/rfc1918
155    /// [IETF RFC 4193]: https://tools.ietf.org/html/rfc4193
156    /// [IETF RFC 4291]: https://tools.ietf.org/html/rfc4291
157    /// [IETF RFC 3879]: https://tools.ietf.org/html/rfc3879
158    exclude_private_ips: bool,
159
160    /// When `true`, exclude link-local IPs.
161    ///
162    /// # IPv4
163    ///
164    /// The address is `169.254.0.0/16`.
165    ///
166    /// This property is defined by [IETF RFC 3927].
167    ///
168    /// # IPv6
169    ///
170    /// The address is a unicast address with link-local scope,  as defined in
171    /// [RFC 4291].
172    ///
173    /// A unicast address has link-local scope if it has the prefix `fe80::/10`,
174    /// as per [RFC 4291 section 2.4].
175    ///
176    /// [IETF RFC 3927]: https://tools.ietf.org/html/rfc3927
177    /// [RFC 4291]: https://tools.ietf.org/html/rfc4291
178    /// [RFC 4291 section 2.4]: https://tools.ietf.org/html/rfc4291#section-2.4
179    exclude_link_local_ips: bool,
180
181    /// When `true`, exclude loopback IP addresses.
182    ///
183    /// # IPv4
184    ///
185    /// This is a loopback address (`127.0.0.0/8`).
186    ///
187    /// This property is defined by [IETF RFC 1122].
188    ///
189    /// # IPv6
190    ///
191    /// This is the loopback address (`::1`), as defined in
192    /// [IETF RFC 4291 section 2.5.3].
193    ///
194    /// [IETF RFC 1122]: https://tools.ietf.org/html/rfc1122
195    /// [IETF RFC 4291 section 2.5.3]: https://tools.ietf.org/html/rfc4291#section-2.5.3
196    exclude_loopback_ips: bool,
197
198    /// When `true`, check mail addresses.
199    include_mail: bool,
200
201    /// Maximum number of redirects per request before returning an error.
202    ///
203    /// Defaults to [`DEFAULT_MAX_REDIRECTS`].
204    #[builder(default = DEFAULT_MAX_REDIRECTS)]
205    max_redirects: usize,
206
207    /// Maximum number of retries per request before returning an error.
208    ///
209    /// Defaults to [`DEFAULT_MAX_RETRIES`].
210    #[builder(default = DEFAULT_MAX_RETRIES)]
211    max_retries: u64,
212
213    /// Minimum accepted TLS version.
214    min_tls_version: Option<tls::Version>,
215
216    /// User-agent used for checking links.
217    ///
218    /// Defaults to [`DEFAULT_USER_AGENT`].
219    ///
220    /// # Notes
221    ///
222    /// This may be helpful for bypassing certain firewalls.
223    // Faking the user agent is necessary for some websites, unfortunately.
224    // Otherwise we get a 403 from the firewall (e.g. Sucuri/Cloudproxy on ldra.com).
225    #[builder(default_code = "String::from(DEFAULT_USER_AGENT)")]
226    user_agent: String,
227
228    /// When `true`, accept invalid SSL certificates.
229    ///
230    /// # Warning
231    ///
232    /// You should think very carefully before allowing invalid SSL
233    /// certificates. It will accept any certificate for any site to be trusted
234    /// including expired certificates. This introduces significant
235    /// vulnerabilities, and should only be used as a last resort.
236    // TODO: We should add a warning message in CLI. (Lucius, Jan 2023)
237    allow_insecure: bool,
238
239    /// Set of accepted URL schemes.
240    ///
241    /// Only links with matched URI schemes are checked. This has no effect when
242    /// it's empty.
243    schemes: HashSet<String>,
244
245    /// Default [headers] for every request.
246    ///
247    /// This allows working around validation issues on some websites. See also
248    /// [here] for usage examples.
249    ///
250    /// [headers]: https://docs.rs/http/latest/http/header/struct.HeaderName.html
251    /// [here]: https://docs.rs/reqwest/latest/reqwest/struct.ClientBuilder.html#method.default_headers
252    custom_headers: HeaderMap,
253
254    /// HTTP method used for requests, e.g. `GET` or `HEAD`.
255    #[builder(default = reqwest::Method::GET)]
256    method: reqwest::Method,
257
258    /// Set of accepted return codes / status codes.
259    ///
260    /// Unmatched return codes/ status codes are deemed as errors.
261    #[builder(default = DEFAULT_ACCEPTED_STATUS_CODES.clone())]
262    accepted: HashSet<StatusCode>,
263
264    /// Response timeout per request.
265    timeout: Option<Duration>,
266
267    /// Base for resolving paths.
268    ///
269    /// E.g. if the base is `/home/user/` and the path is `file.txt`, the
270    /// resolved path would be `/home/user/file.txt`.
271    base: Option<Base>,
272
273    /// Initial time between retries of failed requests.
274    ///
275    /// Defaults to [`DEFAULT_RETRY_WAIT_TIME_SECS`].
276    ///
277    /// # Notes
278    ///
279    /// For each request, the wait time increases using an exponential backoff
280    /// mechanism. For example, if the value is 1 second, then it waits for
281    /// 2 ^ (N-1) seconds before the N-th retry.
282    ///
283    /// This prevents spending too much system resources on slow responders and
284    /// prioritizes other requests.
285    #[builder(default_code = "Duration::from_secs(DEFAULT_RETRY_WAIT_TIME_SECS as u64)")]
286    retry_wait_time: Duration,
287
288    /// When `true`, requires using HTTPS when it's available.
289    ///
290    /// This would treat unencrypted links as errors when HTTPS is available.
291    /// It has no effect on non-HTTP schemes or if the URL doesn't support
292    /// HTTPS.
293    require_https: bool,
294
295    /// Cookie store used for requests.
296    ///
297    /// See <https://docs.rs/reqwest/latest/reqwest/struct.ClientBuilder.html#method.cookie_store>
298    cookie_jar: Option<Arc<CookieStoreMutex>>,
299
300    /// Enable the checking of fragments in links.
301    include_fragments: bool,
302
303    /// Enable the checking of wikilinks in markdown files.
304    /// Note that base must not be `None` if you set this `true`.
305    include_wikilinks: bool,
306
307    /// Requests run through this chain where each item in the chain
308    /// can modify the request. A chained item can also decide to exit
309    /// early and return a status, so that subsequent chain items are
310    /// skipped and the lychee-internal request chain is not activated.
311    plugin_request_chain: RequestChain,
312
313    /// Global rate limiting configuration that applies as defaults to all hosts
314    rate_limit_config: RateLimitConfig,
315
316    /// Per-host configuration overrides
317    hosts: HostConfigs,
318}
319
320impl Default for ClientBuilder {
321    #[inline]
322    fn default() -> Self {
323        Self::builder().build()
324    }
325}
326
327impl ClientBuilder {
328    /// Instantiates a [`Client`].
329    ///
330    /// # Errors
331    ///
332    /// Returns an `Err` if:
333    /// - The user-agent contains characters other than ASCII 32-127.
334    /// - The reqwest client cannot be instantiated. This occurs if a TLS
335    ///   backend cannot be initialized or the resolver fails to load the system
336    ///   configuration. See [here].
337    /// - The GitHub client cannot be created. Since the implementation also
338    ///   uses reqwest under the hood, this errors in the same circumstances as
339    ///   the last one.
340    ///
341    /// [here]: https://docs.rs/reqwest/latest/reqwest/struct.ClientBuilder.html#errors
342    pub fn client(self) -> Result<Client> {
343        let redirect_history = RedirectHistory::new();
344        let reqwest_client = self
345            .build_client(&redirect_history)?
346            .build()
347            .map_err(ErrorKind::BuildRequestClient)?;
348
349        let client_map = self.build_host_clients(&redirect_history)?;
350
351        let host_pool = HostPool::new(
352            self.rate_limit_config,
353            self.hosts,
354            reqwest_client,
355            client_map,
356        );
357
358        let github_client = match self.github_token.as_ref().map(ExposeSecret::expose_secret) {
359            Some(token) if !token.is_empty() => Some(
360                Octocrab::builder()
361                    .personal_token(token.to_string())
362                    .build()
363                    // this is essentially the same `reqwest::ClientBuilder::build` error
364                    // see https://docs.rs/octocrab/0.18.1/src/octocrab/lib.rs.html#360-364
365                    .map_err(|e: octocrab::Error| ErrorKind::BuildGithubClient(Box::new(e)))?,
366            ),
367            _ => None,
368        };
369
370        let filter = Filter {
371            includes: self.includes.map(Into::into),
372            excludes: self.excludes.map(Into::into),
373            schemes: self.schemes,
374            // exclude_all_private option turns on all "private" excludes,
375            // including private IPs, link-local IPs and loopback IPs
376            exclude_private_ips: self.exclude_all_private || self.exclude_private_ips,
377            exclude_link_local_ips: self.exclude_all_private || self.exclude_link_local_ips,
378            exclude_loopback_ips: self.exclude_all_private || self.exclude_loopback_ips,
379            include_mail: self.include_mail,
380        };
381
382        let website_checker = WebsiteChecker::new(
383            self.method,
384            self.retry_wait_time,
385            redirect_history.clone(),
386            self.max_retries,
387            self.accepted,
388            github_client,
389            self.require_https,
390            self.plugin_request_chain,
391            self.include_fragments,
392            Arc::new(host_pool),
393        );
394
395        Ok(Client {
396            remaps: self.remaps,
397            filter,
398            email_checker: MailChecker::new(self.timeout),
399            website_checker,
400            file_checker: FileChecker::new(
401                self.base,
402                self.fallback_extensions,
403                self.index_files,
404                self.include_fragments,
405                self.include_wikilinks,
406            )?,
407        })
408    }
409
410    /// Build the host-specific clients with their host-specific headers
411    fn build_host_clients(&self, redirect_history: &RedirectHistory) -> Result<ClientMap> {
412        self.hosts
413            .iter()
414            .map(|(host, config)| {
415                let mut headers = self.default_headers()?;
416                headers.extend(config.headers.clone());
417                let client = self
418                    .build_client(redirect_history)?
419                    .default_headers(headers)
420                    .build()
421                    .map_err(ErrorKind::BuildRequestClient)?;
422                Ok((HostKey::from(host.as_str()), client))
423            })
424            .collect()
425    }
426
427    /// Create a [`reqwest::ClientBuilder`] based on various fields
428    fn build_client(&self, redirect_history: &RedirectHistory) -> Result<reqwest::ClientBuilder> {
429        let mut builder = reqwest::ClientBuilder::new()
430            .gzip(true)
431            .default_headers(self.default_headers()?)
432            .danger_accept_invalid_certs(self.allow_insecure)
433            .connect_timeout(CONNECT_TIMEOUT)
434            .tcp_keepalive(TCP_KEEPALIVE)
435            .redirect(redirect_policy(
436                redirect_history.clone(),
437                self.max_redirects,
438            ));
439
440        if let Some(cookie_jar) = self.cookie_jar.clone() {
441            builder = builder.cookie_provider(cookie_jar);
442        }
443
444        if let Some(min_tls) = self.min_tls_version {
445            builder = builder.min_tls_version(min_tls);
446        }
447
448        if let Some(timeout) = self.timeout {
449            builder = builder.timeout(timeout);
450        }
451
452        Ok(builder)
453    }
454
455    fn default_headers(&self) -> Result<HeaderMap> {
456        let user_agent = self.user_agent.clone();
457        let mut headers = self.custom_headers.clone();
458
459        if let Some(prev_user_agent) =
460            headers.insert(header::USER_AGENT, HeaderValue::try_from(&user_agent)?)
461        {
462            debug!(
463                "Found user-agent in headers: {}. Overriding it with {user_agent}.",
464                prev_user_agent.to_str().unwrap_or("�"),
465            );
466        }
467
468        headers.insert(
469            header::TRANSFER_ENCODING,
470            HeaderValue::from_static("chunked"),
471        );
472
473        Ok(headers)
474    }
475}
476
477/// Create our custom [`redirect::Policy`] in order to stop following redirects
478/// once `max_redirects` is reached and to record redirections for reporting.
479fn redirect_policy(redirect_history: RedirectHistory, max_redirects: usize) -> redirect::Policy {
480    redirect::Policy::custom(move |attempt| {
481        if attempt.previous().len() > max_redirects {
482            attempt.stop()
483        } else {
484            redirect_history.record_redirects(&attempt);
485            debug!("Following redirect to {}", attempt.url());
486            attempt.follow()
487        }
488    })
489}
490
491/// Handles incoming requests and returns responses.
492///
493/// See [`ClientBuilder`] which contains sane defaults for all configuration
494/// options.
495#[derive(Debug, Clone)]
496pub struct Client {
497    /// Optional remapping rules for URIs matching pattern.
498    remaps: Option<Remaps>,
499
500    /// Rules to decide whether a given link should be checked or ignored.
501    filter: Filter,
502
503    /// A checker for website URLs.
504    website_checker: WebsiteChecker,
505
506    /// A checker for file URLs.
507    file_checker: FileChecker,
508
509    /// A checker for email URLs.
510    email_checker: MailChecker,
511}
512
513impl Client {
514    /// Get `HostPool`
515    #[must_use]
516    pub fn host_pool(&self) -> Arc<HostPool> {
517        self.website_checker.host_pool()
518    }
519
520    /// Check a single request.
521    ///
522    /// `request` can be either a [`Request`] or a type that can be converted
523    /// into it. In any case, it must represent a valid URI.
524    ///
525    /// # Errors
526    ///
527    /// Returns an `Err` if:
528    /// - `request` does not represent a valid URI.
529    /// - Encrypted connection for a HTTP URL is available but unused. (Only
530    ///   checked when `Client::require_https` is `true`.)
531    #[allow(clippy::missing_panics_doc)]
532    pub async fn check<T, E>(&self, request: T) -> Result<Response>
533    where
534        Request: TryFrom<T, Error = E>,
535        ErrorKind: From<E>,
536    {
537        let Request {
538            ref mut uri,
539            credentials,
540            source,
541            ..
542        } = request.try_into()?;
543
544        self.remap(uri)?;
545
546        if self.is_excluded(uri) {
547            return Ok(Response::new(uri.clone(), Status::Excluded, source.into()));
548        }
549
550        let status = match uri.scheme() {
551            _ if uri.is_tel() => Status::Excluded, // We don't check tel: URIs
552            _ if uri.is_file() => self.check_file(uri).await,
553            _ if uri.is_mail() => self.check_mail(uri).await,
554            _ => self.check_website(uri, credentials).await?,
555        };
556
557        Ok(Response::new(uri.clone(), status, source.into()))
558    }
559
560    /// Check a single file using the file checker.
561    pub async fn check_file(&self, uri: &Uri) -> Status {
562        self.file_checker.check(uri).await
563    }
564
565    /// Remap `uri` using the client-defined remapping rules.
566    ///
567    /// # Errors
568    ///
569    /// Returns an `Err` if the final, remapped `uri` is not a valid URI.
570    pub fn remap(&self, uri: &mut Uri) -> Result<()> {
571        if let Some(ref remaps) = self.remaps {
572            uri.url = remaps.remap(&uri.url)?;
573        }
574        Ok(())
575    }
576
577    /// Returns whether the given `uri` should be ignored from checking.
578    #[must_use]
579    pub fn is_excluded(&self, uri: &Uri) -> bool {
580        self.filter.is_excluded(uri)
581    }
582
583    /// Checks the given URI of a website.
584    ///
585    /// # Errors
586    ///
587    /// This returns an `Err` if
588    /// - The URI is invalid.
589    /// - The request failed.
590    /// - The response status code is not accepted.
591    /// - The URI cannot be converted to HTTPS.
592    pub async fn check_website(
593        &self,
594        uri: &Uri,
595        credentials: Option<BasicAuthCredentials>,
596    ) -> Result<Status> {
597        self.website_checker.check_website(uri, credentials).await
598    }
599
600    /// Checks a `mailto` URI.
601    pub async fn check_mail(&self, uri: &Uri) -> Status {
602        self.email_checker.check_mail(uri).await
603    }
604}
605
606/// A shorthand function to check a single URI.
607///
608/// This provides the simplest link check utility without having to create a
609/// [`Client`]. For more complex scenarios, see documentation of
610/// [`ClientBuilder`] instead.
611///
612/// # Errors
613///
614/// Returns an `Err` if:
615/// - The request client cannot be built (see [`ClientBuilder::client`] for
616///   failure cases).
617/// - The request cannot be checked (see [`Client::check`] for failure cases).
618pub async fn check<T, E>(request: T) -> Result<Response>
619where
620    Request: TryFrom<T, Error = E>,
621    ErrorKind: From<E>,
622{
623    let client = ClientBuilder::builder().build().client()?;
624    client.check(request).await
625}
626
627#[cfg(test)]
628mod tests {
629    use std::{
630        fs::File,
631        time::{Duration, Instant},
632    };
633
634    use async_trait::async_trait;
635    use http::{StatusCode, header::HeaderMap};
636    use reqwest::header;
637    use tempfile::tempdir;
638    use test_utils::get_mock_client_response;
639    use test_utils::mock_server;
640    use test_utils::redirecting_mock_server;
641    use wiremock::{
642        Mock,
643        matchers::{method, path},
644    };
645
646    use super::ClientBuilder;
647    use crate::{
648        ErrorKind, Redirect, Redirects, Request, Status, Uri,
649        chain::{ChainResult, Handler, RequestChain},
650    };
651
652    #[tokio::test]
653    async fn test_nonexistent() {
654        let mock_server = mock_server!(StatusCode::NOT_FOUND);
655        let res = get_mock_client_response!(mock_server.uri()).await;
656
657        assert!(res.status().is_error());
658    }
659
660    #[tokio::test]
661    async fn test_nonexistent_with_path() {
662        let res = get_mock_client_response!("http://127.0.0.1/invalid").await;
663        assert!(res.status().is_error());
664    }
665
666    #[tokio::test]
667    async fn test_github() {
668        let res = get_mock_client_response!("https://github.com/lycheeverse/lychee").await;
669        assert!(res.status().is_success());
670    }
671
672    #[tokio::test]
673    async fn test_github_nonexistent_repo() {
674        let res = get_mock_client_response!("https://github.com/lycheeverse/not-lychee").await;
675        assert!(res.status().is_error());
676    }
677
678    #[tokio::test]
679    async fn test_github_nonexistent_file() {
680        let res = get_mock_client_response!(
681            "https://github.com/lycheeverse/lychee/blob/master/NON_EXISTENT_FILE.md",
682        )
683        .await;
684        assert!(res.status().is_error());
685    }
686
687    #[tokio::test]
688    async fn test_youtube() {
689        // This is applying a quirk. See the quirks module.
690        let res = get_mock_client_response!("https://www.youtube.com/watch?v=NlKuICiT470&list=PLbWDhxwM_45mPVToqaIZNbZeIzFchsKKQ&index=7").await;
691        assert!(res.status().is_success());
692
693        let res = get_mock_client_response!("https://www.youtube.com/watch?v=invalidNlKuICiT470&list=PLbWDhxwM_45mPVToqaIZNbZeIzFchsKKQ&index=7").await;
694        assert!(res.status().is_error());
695    }
696
697    #[tokio::test]
698    async fn test_basic_auth() {
699        let mut r: Request = "https://authenticationtest.com/HTTPAuth/"
700            .try_into()
701            .unwrap();
702
703        let res = get_mock_client_response!(r.clone()).await;
704        assert_eq!(res.status().code(), Some(401.try_into().unwrap()));
705
706        r.credentials = Some(crate::BasicAuthCredentials {
707            username: "user".into(),
708            password: "pass".into(),
709        });
710
711        let res = get_mock_client_response!(r).await;
712        assert!(matches!(
713            res.status(),
714            Status::Redirected(StatusCode::OK, _)
715        ));
716    }
717
718    #[tokio::test]
719    async fn test_non_github() {
720        let mock_server = mock_server!(StatusCode::OK);
721        let res = get_mock_client_response!(mock_server.uri()).await;
722
723        assert!(res.status().is_success());
724    }
725
726    #[tokio::test]
727    async fn test_invalid_ssl() {
728        let res = get_mock_client_response!("https://expired.badssl.com/").await;
729
730        assert!(res.status().is_error());
731
732        // Same, but ignore certificate error
733        let res = ClientBuilder::builder()
734            .allow_insecure(true)
735            .build()
736            .client()
737            .unwrap()
738            .check("https://expired.badssl.com/")
739            .await
740            .unwrap();
741        assert!(res.status().is_success());
742    }
743
744    #[tokio::test]
745    async fn test_file() {
746        let dir = tempdir().unwrap();
747        let file = dir.path().join("temp");
748        File::create(file).unwrap();
749        let uri = format!("file://{}", dir.path().join("temp").to_str().unwrap());
750
751        let res = get_mock_client_response!(uri).await;
752        assert!(res.status().is_success());
753    }
754
755    #[tokio::test]
756    async fn test_custom_headers() {
757        // See https://github.com/rust-lang/crates.io/issues/788
758        let mut custom = HeaderMap::new();
759        custom.insert(header::ACCEPT, "text/html".parse().unwrap());
760        let res = ClientBuilder::builder()
761            .custom_headers(custom)
762            .build()
763            .client()
764            .unwrap()
765            .check("https://crates.io/crates/lychee")
766            .await
767            .unwrap();
768        assert!(res.status().is_success());
769    }
770
771    #[tokio::test]
772    async fn test_exclude_mail_by_default() {
773        let client = ClientBuilder::builder()
774            .exclude_all_private(true)
775            .build()
776            .client()
777            .unwrap();
778        assert!(client.is_excluded(&Uri {
779            url: "mailto://mail@example.com".try_into().unwrap()
780        }));
781    }
782
783    #[tokio::test]
784    async fn test_include_mail() {
785        let client = ClientBuilder::builder()
786            .include_mail(false)
787            .exclude_all_private(true)
788            .build()
789            .client()
790            .unwrap();
791        assert!(client.is_excluded(&Uri {
792            url: "mailto://mail@example.com".try_into().unwrap()
793        }));
794
795        let client = ClientBuilder::builder()
796            .include_mail(true)
797            .exclude_all_private(true)
798            .build()
799            .client()
800            .unwrap();
801        assert!(!client.is_excluded(&Uri {
802            url: "mailto://mail@example.com".try_into().unwrap()
803        }));
804    }
805
806    #[tokio::test]
807    async fn test_include_tel() {
808        let client = ClientBuilder::builder().build().client().unwrap();
809        assert!(client.is_excluded(&Uri {
810            url: "tel:1234567890".try_into().unwrap()
811        }));
812    }
813
814    #[tokio::test]
815    async fn test_require_https() {
816        let client = ClientBuilder::builder().build().client().unwrap();
817        let res = client.check("http://example.com").await.unwrap();
818        assert!(res.status().is_success());
819
820        // Same request will fail if HTTPS is required
821        let client = ClientBuilder::builder()
822            .require_https(true)
823            .build()
824            .client()
825            .unwrap();
826        let res = client.check("http://example.com").await.unwrap();
827        assert!(res.status().is_error());
828    }
829
830    #[tokio::test]
831    async fn test_timeout() {
832        // Note: this checks response timeout, not connect timeout.
833        // To check connect timeout, we'd have to do something more involved,
834        // see: https://github.com/LukeMathWalker/wiremock-rs/issues/19
835        let mock_delay = Duration::from_millis(20);
836        let checker_timeout = Duration::from_millis(10);
837        assert!(mock_delay > checker_timeout);
838
839        let mock_server = mock_server!(StatusCode::OK, set_delay(mock_delay));
840
841        let client = ClientBuilder::builder()
842            .timeout(checker_timeout)
843            .max_retries(0u64)
844            .build()
845            .client()
846            .unwrap();
847
848        let res = client.check(mock_server.uri()).await.unwrap();
849        assert!(res.status().is_timeout());
850    }
851
852    #[tokio::test]
853    async fn test_exponential_backoff() {
854        let mock_delay = Duration::from_millis(20);
855        let checker_timeout = Duration::from_millis(10);
856        assert!(mock_delay > checker_timeout);
857
858        let mock_server = mock_server!(StatusCode::OK, set_delay(mock_delay));
859
860        // Perform a warm-up request to ensure the lazy regexes
861        // in lychee-lib/src/quirks/mod.rs are compiled.
862        // On some platforms, this can take some time(approx. 110ms),
863        // which should not be counted in the test.
864        let warm_up_client = ClientBuilder::builder()
865            .max_retries(0_u64)
866            .build()
867            .client()
868            .unwrap();
869        let _res = warm_up_client.check(mock_server.uri()).await.unwrap();
870
871        let client = ClientBuilder::builder()
872            .timeout(checker_timeout)
873            .max_retries(3_u64)
874            .retry_wait_time(Duration::from_millis(50))
875            .build()
876            .client()
877            .unwrap();
878
879        // Summary:
880        // 1. First request fails with timeout (after 10ms)
881        // 2. Retry after 50ms (total 60ms)
882        // 3. Second request fails with timeout (after 10ms)
883        // 4. Retry after 100ms (total 160ms)
884        // 5. Third request fails with timeout (after 10ms)
885        // 6. Retry after 200ms (total 360ms)
886        // Total: 360ms
887
888        let start = Instant::now();
889        let res = client.check(mock_server.uri()).await.unwrap();
890        let end = start.elapsed();
891
892        assert!(res.status().is_error());
893
894        // on slow connections, this might take a bit longer than nominal
895        // backed-off timeout (7 secs)
896        assert!((350..=550).contains(&end.as_millis()));
897    }
898
899    #[tokio::test]
900    async fn test_avoid_reqwest_panic() {
901        let client = ClientBuilder::builder().build().client().unwrap();
902        // This request will result in an Unsupported status, but it won't panic
903        let res = client.check("http://\"").await.unwrap();
904
905        assert!(matches!(
906            res.status(),
907            Status::Unsupported(ErrorKind::BuildRequestClient(_))
908        ));
909        assert!(res.status().is_unsupported());
910    }
911
912    #[tokio::test]
913    async fn test_max_redirects() {
914        let mock_server = wiremock::MockServer::start().await;
915
916        let redirect_uri = format!("{}/redirect", &mock_server.uri());
917        let redirect = wiremock::ResponseTemplate::new(StatusCode::PERMANENT_REDIRECT)
918            .insert_header("Location", redirect_uri.as_str());
919
920        let redirect_count = 15usize;
921        let initial_invocation = 1;
922
923        // Set up infinite redirect loop
924        Mock::given(method("GET"))
925            .and(path("/redirect"))
926            .respond_with(move |_: &_| redirect.clone())
927            .expect(initial_invocation + redirect_count as u64)
928            .mount(&mock_server)
929            .await;
930
931        let res = ClientBuilder::builder()
932            .max_redirects(redirect_count)
933            .build()
934            .client()
935            .unwrap()
936            .check(redirect_uri.clone())
937            .await
938            .unwrap();
939
940        assert_eq!(
941            res.status(),
942            &Status::Error(ErrorKind::RejectedStatusCode(
943                StatusCode::PERMANENT_REDIRECT
944            ))
945        );
946    }
947
948    #[tokio::test]
949    async fn test_redirects() {
950        redirecting_mock_server!(async |redirect_url: Url, ok_url| {
951            let res = ClientBuilder::builder()
952                .max_redirects(1_usize)
953                .build()
954                .client()
955                .unwrap()
956                .check(Uri::from((redirect_url).clone()))
957                .await
958                .unwrap();
959
960            let mut redirects = Redirects::new(redirect_url);
961            redirects.push(Redirect {
962                url: ok_url,
963                code: StatusCode::PERMANENT_REDIRECT,
964            });
965            assert_eq!(res.status(), &Status::Redirected(StatusCode::OK, redirects));
966        })
967        .await;
968    }
969
970    #[tokio::test]
971    async fn test_unsupported_scheme() {
972        let examples = vec![
973            "ftp://example.com",
974            "gopher://example.com",
975            "slack://example.com",
976        ];
977
978        for example in examples {
979            let client = ClientBuilder::builder().build().client().unwrap();
980            let res = client.check(example).await.unwrap();
981            assert!(res.status().is_unsupported());
982        }
983    }
984
985    #[tokio::test]
986    async fn test_chain() {
987        use reqwest::Request;
988
989        #[derive(Debug)]
990        struct ExampleHandler();
991
992        #[async_trait]
993        impl Handler<Request, Status> for ExampleHandler {
994            async fn handle(&mut self, _: Request) -> ChainResult<Request, Status> {
995                ChainResult::Done(Status::Excluded)
996            }
997        }
998
999        let chain = RequestChain::new(vec![Box::new(ExampleHandler {})]);
1000
1001        let client = ClientBuilder::builder()
1002            .plugin_request_chain(chain)
1003            .build()
1004            .client()
1005            .unwrap();
1006
1007        let result = client.check("http://example.com");
1008        let res = result.await.unwrap();
1009        assert_eq!(res.status(), &Status::Excluded);
1010    }
1011}