Skip to main content

scrapling_spider/
error.rs

1//! Error types for the spider crate.
2//!
3//! All fallible operations in `scrapling-spider` return [`Result<T>`], which is an
4//! alias for `std::result::Result<T, SpiderError>`. The [`SpiderError`] enum has a
5//! variant for each subsystem (fetch, browser, session, checkpoint, robots.txt, and
6//! configuration) so callers can pattern-match on the source of a failure and decide
7//! how to handle it -- for example, retrying on transient fetch errors while
8//! immediately aborting on configuration mistakes.
9//!
10//! Both `scrapling_fetch::FetchError` and `scrapling_browser::BrowserError` implement
11//! `Into<SpiderError>`, so the `?` operator works seamlessly when calling into those
12//! crates from spider code.
13
14use std::fmt;
15
16/// The central error type for everything that can go wrong during a crawl.
17///
18/// Each variant wraps either a structured error from a downstream crate or a
19/// human-readable `String` describing the problem. You will typically encounter
20/// this type through the [`Result`] alias rather than constructing it directly.
21#[derive(Debug)]
22pub enum SpiderError {
23    /// A configuration validation error, raised when spider settings are invalid
24    /// (for example, a negative checkpoint interval). The string describes what
25    /// was wrong with the configuration.
26    Config(String),
27    /// An error originating from the HTTP fetch layer (`scrapling-fetch`). This
28    /// wraps the underlying `FetchError` so you can inspect network-level details
29    /// such as connection timeouts or DNS failures.
30    Fetch(scrapling_fetch::FetchError),
31    /// An error originating from the browser automation layer (`scrapling-browser`).
32    /// This wraps the underlying `BrowserError`, which covers headless-browser
33    /// launch failures, page navigation errors, and similar issues.
34    Browser(scrapling_browser::BrowserError),
35    /// A session management error, raised when a requested session ID does not
36    /// exist or when a duplicate session is registered. Check the contained message
37    /// for the list of available session IDs.
38    Session(String),
39    /// A checkpoint save or restore error, raised when the crawler cannot write or
40    /// read its state snapshot on disk. Common causes include missing directories
41    /// and permission problems.
42    Checkpoint(String),
43    /// A robots.txt parsing or enforcement error. In practice this variant is
44    /// rarely surfaced because the robots.txt manager degrades gracefully (treating
45    /// unparseable files as "allow all"), but it exists for explicit error paths.
46    RobotsTxt(String),
47    /// A catch-all error for uncategorized failures that do not fit into any other
48    /// variant. Use this sparingly; prefer a more specific variant when one applies.
49    Other(String),
50}
51
52impl fmt::Display for SpiderError {
53    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
54        match self {
55            Self::Config(e) => write!(f, "config error: {e}"),
56            Self::Fetch(e) => write!(f, "fetch error: {e}"),
57            Self::Browser(e) => write!(f, "browser error: {e}"),
58            Self::Session(e) => write!(f, "session error: {e}"),
59            Self::Checkpoint(e) => write!(f, "checkpoint error: {e}"),
60            Self::RobotsTxt(e) => write!(f, "robots.txt error: {e}"),
61            Self::Other(e) => write!(f, "{e}"),
62        }
63    }
64}
65
66impl std::error::Error for SpiderError {}
67
68impl From<scrapling_fetch::FetchError> for SpiderError {
69    fn from(e: scrapling_fetch::FetchError) -> Self {
70        Self::Fetch(e)
71    }
72}
73
74impl From<scrapling_browser::BrowserError> for SpiderError {
75    fn from(e: scrapling_browser::BrowserError) -> Self {
76        Self::Browser(e)
77    }
78}
79
80/// A convenience alias so every function in this crate can write `Result<T>`
81/// instead of `std::result::Result<T, SpiderError>`. This is re-exported from
82/// the crate root, so downstream code can use `scrapling_spider::Result` directly.
83pub type Result<T> = std::result::Result<T, SpiderError>;