Skip to main content

lychee_lib/types/
error.rs

1use http::StatusCode;
2use serde::{Serialize, Serializer};
3use std::error::Error;
4use std::hash::Hash;
5use std::{convert::Infallible, path::PathBuf};
6use thiserror::Error;
7use tokio::task::JoinError;
8
9use super::InputContent;
10use crate::types::StatusCodeSelectorError;
11use crate::{Uri, basic_auth::BasicAuthExtractorError, utils};
12
13/// Kinds of status errors
14/// Note: The error messages can change over time, so don't match on the output
15#[derive(Error, Debug)]
16#[non_exhaustive]
17pub enum ErrorKind {
18    /// Network error while handling request.
19    /// This does not include erroneous status codes, `RejectedStatusCode` will be used in that case.
20    #[error("Network error: {analysis} ({error})", analysis=utils::reqwest::analyze_error_chain(.0), error=.0)]
21    NetworkRequest(#[source] reqwest::Error),
22    /// Cannot read the body of the received response
23    #[error("Failed to read response body: {0}")]
24    ReadResponseBody(#[source] reqwest::Error),
25    /// The network client required for making requests cannot be created
26    #[error("Failed to create HTTP request client: {0}")]
27    BuildRequestClient(#[source] reqwest::Error),
28
29    /// Network error while using GitHub API
30    #[error("Network error while using GitHub client")]
31    GithubRequest(#[from] Box<octocrab::Error>),
32
33    /// Error while executing a future on the Tokio runtime
34    #[error("Task failed to execute to completion: {0}")]
35    RuntimeJoin(#[from] JoinError),
36
37    /// Error while converting a file to an input
38    #[error("Cannot read input content from file '{1}'")]
39    ReadFileInput(#[source] std::io::Error, PathBuf),
40    /// Error while reading an input URL
41    #[error(
42        "Cannot read input content from URL: status code {0}. To check links in error pages, download and check locally instead."
43    )]
44    ReadInputUrlStatusCode(StatusCode),
45
46    /// Error while reading stdin as input
47    #[error("Cannot read content from stdin: {0}")]
48    ReadStdinInput(#[from] std::io::Error),
49
50    /// Errors which can occur when attempting to interpret a sequence of u8 as a string
51    ///
52    #[error(
53        "Encountered invalid UTF-8 sequence, while trying to interpret bytes UTF-8 string: {0}"
54    )]
55    Utf8(#[from] std::str::Utf8Error),
56
57    /// The GitHub client required for making requests cannot be created
58    #[error("Failed to create GitHub client")]
59    BuildGithubClient(#[source] Box<octocrab::Error>),
60
61    /// Invalid GitHub URL
62    #[error("GitHub URL is invalid: {0}")]
63    InvalidGithubUrl(String),
64
65    /// The input is empty and not accepted as a valid URL
66    #[error("Empty URL found but a URL must not be empty")]
67    EmptyUrl,
68
69    /// The given string can not be parsed into a valid URL, e-mail address, or file path
70    #[error("Cannot parse '{1}' into a URL: {0}")]
71    ParseUrl(#[source] url::ParseError, String),
72
73    /// The given string is a root-relative link and cannot be parsed without a known root-dir
74    #[error("Cannot resolve root-relative link '{0}'")]
75    RootRelativeLinkWithoutRoot(String),
76
77    /// The given URI cannot be converted to a file path
78    #[error("File not found. Check if file exists and path is correct")]
79    InvalidFilePath(Uri),
80
81    /// The given URI's fragment could not be found within the page content
82    #[error("Cannot find fragment")]
83    InvalidFragment(Uri),
84
85    /// Cannot resolve local directory link using the configured index files
86    #[error("Cannot find index file within directory")]
87    InvalidIndexFile(Vec<String>),
88
89    /// The given path cannot be converted to a URI
90    #[error("Cannot convert path to URL: '{0}'")]
91    InvalidUrlFromPath(PathBuf),
92
93    /// The given mail address is unreachable
94    #[error("Unreachable mail address {0}")]
95    UnreachableEmailAddress(Uri, String),
96
97    /// The given header could not be parsed.
98    /// A possible error when converting a `HeaderValue` from a string or byte
99    /// slice.
100    #[error("Invalid HTTP header: {0}")]
101    InvalidHeader(#[from] http::header::InvalidHeaderValue),
102
103    /// The given string can not be parsed into a valid base URL or base directory
104    #[error("Invalid base URL or directory: '{0}'. {1}")]
105    InvalidBase(String, String),
106
107    /// The given URI type is not supported
108    #[error("Unsupported URI type: '{0}'")]
109    UnsupportedUriType(String),
110
111    /// The given input can not be parsed into a valid URI remap
112    #[error("Invalid remap pattern: {0}")]
113    InvalidUrlRemap(String),
114
115    /// The given input is neither a valid file path nor a valid URL
116    #[error(
117        "Input '{0}' not found as file and not a valid URL. Use full URL (e.g., https://example.com) or check file path."
118    )]
119    InvalidInput(String),
120
121    /// Error while traversing an input directory
122    #[error("Cannot traverse input directory: {0}")]
123    DirTraversal(#[from] ignore::Error),
124
125    /// The given glob pattern is not valid
126    #[error("Invalid glob pattern: {0}")]
127    InvalidGlobPattern(#[from] glob::PatternError),
128
129    /// The GitHub API could not be called because of a missing GitHub token.
130    #[error("GitHub token required")]
131    MissingGitHubToken,
132
133    /// Used an insecure URI where a secure variant was reachable
134    #[error("Insecure HTTP URL used, where '{0}' can be used instead")]
135    InsecureURL(Uri),
136
137    /// Error while sending/receiving messages from MPSC channel
138    #[error("Internal communication error, cannot send/receive message over channel: {0}")]
139    Channel(#[from] tokio::sync::mpsc::error::SendError<InputContent>),
140
141    /// A URL without a host was found
142    #[error("URL is missing a hostname")]
143    InvalidUrlHost,
144
145    /// Cannot parse the given URI
146    #[error("The given URI is invalid, check URI syntax: {0}")]
147    InvalidURI(Uri),
148
149    /// The given status code is invalid (not in range 100-999)
150    #[error("Invalid status code: {0}")]
151    InvalidStatusCode(u16),
152
153    /// The given status code was not accepted (this depends on the `accept` configuration)
154    #[error(
155        r#"Rejected status code: {code} {reason} (configurable with "accept" option)"#,
156        code = .0.as_str(),
157        reason = .0.canonical_reason().unwrap_or("Unknown status code")
158    )]
159    RejectedStatusCode(StatusCode),
160
161    /// Regex error
162    #[error("Regular expression error: {0}. Check regex syntax")]
163    Regex(#[from] regex::Error),
164
165    /// Basic authentication extractor error
166    #[error("Basic authentication extraction error: {0}")]
167    BasicAuthExtractorError(#[from] BasicAuthExtractorError),
168
169    /// Cannot handle cookies
170    #[error("Cookie handling error: {0}")]
171    Cookies(String),
172
173    /// Status code selector parse error
174    #[error("Unable to parse status code selector: {0}")]
175    StatusCodeSelectorError(#[from] StatusCodeSelectorError),
176
177    /// Preprocessor command error
178    #[error("Preprocessor command '{command}' failed with '{reason}'")]
179    PreprocessorError {
180        /// The command which did not execute successfully
181        command: String,
182        /// The reason the command failed
183        reason: String,
184    },
185
186    /// The extracted `WikiLink` could not be found by searching the directory
187    #[error("Wikilink {0} not found at {1}")]
188    WikilinkNotFound(Uri, PathBuf),
189
190    /// Invalid base URL for `WikiLink` checking
191    #[error("Invalid base URL for WikiLink checking: {0}")]
192    WikilinkInvalidBase(String),
193}
194
195impl ErrorKind {
196    /// Return more details about the given [`ErrorKind`]
197    ///
198    /// Which additional information we can extract depends on the underlying
199    /// request type. The output is purely meant for humans (e.g. for status
200    /// messages) and future changes are expected.
201    #[must_use]
202    #[allow(clippy::too_many_lines)]
203    pub fn details(&self) -> String {
204        match self {
205            ErrorKind::NetworkRequest(e) => utils::reqwest::analyze_error_chain(e),
206            ErrorKind::GithubRequest(e) => {
207                let detail = if let octocrab::Error::GitHub { source, .. } = &**e {
208                    source.message.clone()
209                } else {
210                    e.to_string()
211                };
212                format!("{self}: {detail}")
213            }
214            ErrorKind::ReadFileInput(e, path) => match e.kind() {
215                std::io::ErrorKind::NotFound => "Check if file path is correct".to_string(),
216                std::io::ErrorKind::PermissionDenied => format!(
217                    "Permission denied: '{}'. Check file permissions",
218                    path.display()
219                ),
220                std::io::ErrorKind::IsADirectory => format!(
221                    "Path is a directory, not a file: '{}'. Check file path",
222                    path.display()
223                ),
224                _ => format!("File read error for '{}': {e}", path.display()),
225            },
226            // This `details()` method never gets called for incorrect CLI
227            // inputs, so whatever we put here, it won't be shown to the user.
228            //
229            // This returns an empty string as a sentinel value because it's handled as a
230            // fatal application error rather than a link-level error.
231            //
232            // TODO: In the future, we should return an Option<String> or separate
233            // application errors from library errors.
234            ErrorKind::ReadInputUrlStatusCode(_) => String::new(),
235            ErrorKind::ParseUrl(e, _url) => {
236                let detail = match e {
237                    url::ParseError::RelativeUrlWithoutBase => {
238                        ": This relative link was found inside an input source that has no base location"
239                    }
240                    _ => "",
241                };
242
243                format!("{self}{detail}")
244            }
245            ErrorKind::RootRelativeLinkWithoutRoot(_) => {
246                format!("{self}: To resolve root-relative links in local files, provide a root dir")
247            }
248            ErrorKind::BuildRequestClient(_) => {
249                format!("{self}: Check system configuration")
250            }
251            ErrorKind::BuildGithubClient(error) => {
252                format!("{self}: {error}. Check token and network connectivity")
253            }
254            ErrorKind::InvalidGithubUrl(_) => {
255                format!("{self}. Check URL syntax")
256            }
257            ErrorKind::InvalidUrlFromPath(_) => {
258                format!("{self}. Check path format")
259            }
260            ErrorKind::UnreachableEmailAddress(_uri, reason) => reason.clone(),
261            ErrorKind::InvalidHeader(_) => {
262                format!("{self}. Check header format")
263            }
264            ErrorKind::UnsupportedUriType(_) => {
265                format!("{self}. Only http, https, file, and mailto are supported")
266            }
267            ErrorKind::InvalidUrlRemap(_) => {
268                format!("{self}. Check remap syntax")
269            }
270            ErrorKind::DirTraversal(_) => {
271                format!("{self}. Check directory permissions")
272            }
273            ErrorKind::InvalidGlobPattern(_) => {
274                format!("{self}. Check pattern syntax")
275            }
276            ErrorKind::MissingGitHubToken => {
277                format!("{self}. Use --github-token flag or GITHUB_TOKEN environment variable")
278            }
279            ErrorKind::InvalidStatusCode(_) => {
280                format!("{self}. Must be in the range 100-999")
281            }
282            ErrorKind::BasicAuthExtractorError(_) => {
283                format!("{self}. {}", "Check credentials format")
284            }
285            ErrorKind::Cookies(_) => {
286                format!("{self}. Check cookie file format")
287            }
288            ErrorKind::StatusCodeSelectorError(_) => {
289                format!("{self}. Check 'accept' and 'cache_exclude_status' configuration")
290            }
291            ErrorKind::InvalidIndexFile(index_files) => {
292                let details = match &index_files[..] {
293                    [] => "Directory links are rejected because index_files is empty".into(),
294                    [name] => format!("An index file ({name}) is required"),
295                    [init @ .., tail] => format!(
296                        "An index file ({}, or {}) is required",
297                        init.join(", "),
298                        tail
299                    ),
300                };
301
302                format!("{self}: {details}")
303            }
304            ErrorKind::InvalidFragment(_)
305            | ErrorKind::RejectedStatusCode(_)
306            | ErrorKind::InvalidFilePath(_)
307            | ErrorKind::InvalidURI(_)
308            | ErrorKind::InvalidInput(_)
309            | ErrorKind::Regex(_)
310            | ErrorKind::Utf8(_)
311            | ErrorKind::ReadResponseBody(_)
312            | ErrorKind::RuntimeJoin(_)
313            | ErrorKind::WikilinkInvalidBase(_)
314            | ErrorKind::Channel(_)
315            | ErrorKind::InsecureURL(_)
316            | ErrorKind::ReadStdinInput(_)
317            | ErrorKind::InvalidBase(_, _)
318            | ErrorKind::WikilinkNotFound(_, _)
319            | ErrorKind::EmptyUrl
320            | ErrorKind::InvalidUrlHost
321            | ErrorKind::PreprocessorError {
322                command: _,
323                reason: _,
324            } => self.to_string(),
325        }
326    }
327
328    /// Return the underlying source of the given [`ErrorKind`]
329    /// if it is a `reqwest::Error`.
330    /// This is useful for extracting the status code of a failed request.
331    /// If the error is not a `reqwest::Error`, `None` is returned.
332    #[must_use]
333    #[allow(clippy::redundant_closure_for_method_calls)]
334    pub(crate) fn reqwest_error(&self) -> Option<&reqwest::Error> {
335        self.source()
336            .and_then(|e| e.downcast_ref::<reqwest::Error>())
337    }
338
339    /// Return the underlying source of the given [`ErrorKind`]
340    /// if it is a `octocrab::Error`.
341    /// This is useful for extracting the status code of a failed request.
342    /// If the error is not a `octocrab::Error`, `None` is returned.
343    #[must_use]
344    #[allow(clippy::redundant_closure_for_method_calls)]
345    pub(crate) fn github_error(&self) -> Option<&octocrab::Error> {
346        self.source()
347            .and_then(|e| e.downcast_ref::<octocrab::Error>())
348    }
349}
350
351#[allow(clippy::match_same_arms)]
352impl PartialEq for ErrorKind {
353    fn eq(&self, other: &Self) -> bool {
354        match (self, other) {
355            (Self::NetworkRequest(e1), Self::NetworkRequest(e2)) => {
356                e1.to_string() == e2.to_string()
357            }
358            (Self::ReadResponseBody(e1), Self::ReadResponseBody(e2)) => {
359                e1.to_string() == e2.to_string()
360            }
361            (Self::BuildRequestClient(e1), Self::BuildRequestClient(e2)) => {
362                e1.to_string() == e2.to_string()
363            }
364            (Self::RuntimeJoin(e1), Self::RuntimeJoin(e2)) => e1.to_string() == e2.to_string(),
365            (Self::ReadFileInput(e1, s1), Self::ReadFileInput(e2, s2)) => {
366                e1.kind() == e2.kind() && s1 == s2
367            }
368            (Self::ReadInputUrlStatusCode(e1), Self::ReadInputUrlStatusCode(e2)) => e1 == e2,
369            (Self::ReadStdinInput(e1), Self::ReadStdinInput(e2)) => e1.kind() == e2.kind(),
370            (Self::GithubRequest(e1), Self::GithubRequest(e2)) => e1.to_string() == e2.to_string(),
371            (Self::InvalidGithubUrl(s1), Self::InvalidGithubUrl(s2)) => s1 == s2,
372            (Self::ParseUrl(s1, e1), Self::ParseUrl(s2, e2)) => s1 == s2 && e1 == e2,
373            (Self::UnreachableEmailAddress(u1, ..), Self::UnreachableEmailAddress(u2, ..)) => {
374                u1 == u2
375            }
376            (Self::InsecureURL(u1), Self::InsecureURL(u2)) => u1 == u2,
377            (Self::InvalidGlobPattern(e1), Self::InvalidGlobPattern(e2)) => {
378                e1.msg == e2.msg && e1.pos == e2.pos
379            }
380            (Self::InvalidHeader(_), Self::InvalidHeader(_))
381            | (Self::MissingGitHubToken, Self::MissingGitHubToken) => true,
382            (Self::InvalidStatusCode(c1), Self::InvalidStatusCode(c2)) => c1 == c2,
383            (Self::InvalidUrlHost, Self::InvalidUrlHost) => true,
384            (Self::InvalidURI(u1), Self::InvalidURI(u2)) => u1 == u2,
385            (Self::Regex(e1), Self::Regex(e2)) => e1.to_string() == e2.to_string(),
386            (Self::DirTraversal(e1), Self::DirTraversal(e2)) => e1.to_string() == e2.to_string(),
387            (Self::Channel(_), Self::Channel(_)) => true,
388            (Self::BasicAuthExtractorError(e1), Self::BasicAuthExtractorError(e2)) => {
389                e1.to_string() == e2.to_string()
390            }
391            (Self::Cookies(e1), Self::Cookies(e2)) => e1 == e2,
392            (Self::InvalidInput(s1), Self::InvalidInput(s2)) => s1 == s2,
393            (Self::InvalidFilePath(u1), Self::InvalidFilePath(u2)) => u1 == u2,
394            (Self::InvalidFragment(u1), Self::InvalidFragment(u2)) => u1 == u2,
395            (Self::InvalidIndexFile(p1), Self::InvalidIndexFile(p2)) => p1 == p2,
396            (Self::InvalidUrlFromPath(p1), Self::InvalidUrlFromPath(p2)) => p1 == p2,
397            (Self::InvalidBase(b1, e1), Self::InvalidBase(b2, e2)) => b1 == b2 && e1 == e2,
398            (Self::InvalidUrlRemap(r1), Self::InvalidUrlRemap(r2)) => r1 == r2,
399            (Self::EmptyUrl, Self::EmptyUrl) => true,
400            (Self::RejectedStatusCode(c1), Self::RejectedStatusCode(c2)) => c1 == c2,
401
402            _ => false,
403        }
404    }
405}
406
407impl Eq for ErrorKind {}
408
409#[allow(clippy::match_same_arms)]
410impl Hash for ErrorKind {
411    fn hash<H>(&self, state: &mut H)
412    where
413        H: std::hash::Hasher,
414    {
415        match self {
416            Self::RuntimeJoin(e) => e.to_string().hash(state),
417            Self::ReadFileInput(e, s) => (e.kind(), s).hash(state),
418            Self::ReadInputUrlStatusCode(c) => c.hash(state),
419            Self::ReadStdinInput(e) => e.kind().hash(state),
420            Self::NetworkRequest(e) => e.to_string().hash(state),
421            Self::ReadResponseBody(e) => e.to_string().hash(state),
422            Self::BuildRequestClient(e) => e.to_string().hash(state),
423            Self::BuildGithubClient(e) => e.to_string().hash(state),
424            Self::GithubRequest(e) => e.to_string().hash(state),
425            Self::InvalidGithubUrl(s) => s.hash(state),
426            Self::DirTraversal(e) => e.to_string().hash(state),
427            Self::InvalidInput(s) => s.hash(state),
428            Self::EmptyUrl => "Empty URL".hash(state),
429            Self::ParseUrl(e, s) => (e.to_string(), s).hash(state),
430            Self::RootRelativeLinkWithoutRoot(s) => s.hash(state),
431            Self::InvalidURI(u) => u.hash(state),
432            Self::InvalidUrlFromPath(p) => p.hash(state),
433            Self::Utf8(e) => e.to_string().hash(state),
434            Self::InvalidFilePath(u) => u.hash(state),
435            Self::InvalidFragment(u) => u.hash(state),
436            Self::InvalidIndexFile(p) => p.hash(state),
437            Self::UnreachableEmailAddress(u, ..) => u.hash(state),
438            Self::InsecureURL(u, ..) => u.hash(state),
439            Self::InvalidBase(base, e) => (base, e).hash(state),
440            Self::UnsupportedUriType(s) => s.hash(state),
441            Self::InvalidUrlRemap(remap) => (remap).hash(state),
442            Self::InvalidHeader(e) => e.to_string().hash(state),
443            Self::InvalidGlobPattern(e) => e.to_string().hash(state),
444            Self::InvalidStatusCode(c) => c.hash(state),
445            Self::RejectedStatusCode(c) => c.hash(state),
446            Self::Channel(e) => e.to_string().hash(state),
447            Self::MissingGitHubToken | Self::InvalidUrlHost => {
448                std::mem::discriminant(self).hash(state);
449            }
450            Self::Regex(e) => e.to_string().hash(state),
451            Self::BasicAuthExtractorError(e) => e.to_string().hash(state),
452            Self::Cookies(e) => e.hash(state),
453            Self::StatusCodeSelectorError(e) => e.to_string().hash(state),
454            Self::PreprocessorError { command, reason } => (command, reason).hash(state),
455            Self::WikilinkNotFound(uri, pathbuf) => (uri, pathbuf).hash(state),
456            Self::WikilinkInvalidBase(e) => e.hash(state),
457        }
458    }
459}
460
461impl Serialize for ErrorKind {
462    fn serialize<S>(&self, serializer: S) -> std::result::Result<S::Ok, S::Error>
463    where
464        S: Serializer,
465    {
466        serializer.collect_str(self)
467    }
468}
469
470impl From<Infallible> for ErrorKind {
471    fn from(_: Infallible) -> Self {
472        // tautological
473        unreachable!()
474    }
475}
476
477#[cfg(test)]
478mod tests {
479    use crate::ErrorKind;
480    #[test]
481    fn test_error_kind_details() {
482        // Test rejected status code
483        let status_error = ErrorKind::RejectedStatusCode(http::StatusCode::NOT_FOUND);
484        assert!(status_error.to_string().contains("Not Found"));
485
486        // Test redirected status code
487        let redir_error = ErrorKind::RejectedStatusCode(http::StatusCode::MOVED_PERMANENTLY);
488        assert!(
489            redir_error
490                .details()
491                .contains(r#"(configurable with "accept" option)"#)
492        );
493    }
494}