scrapling_spider/error.rs
1//! Error types for the spider crate.
2//!
3//! All fallible operations in `scrapling-spider` return [`Result<T>`], which is an
4//! alias for `std::result::Result<T, SpiderError>`. The [`SpiderError`] enum has a
5//! variant for each subsystem (fetch, browser, session, checkpoint, robots.txt, and
6//! configuration) so callers can pattern-match on the source of a failure and decide
7//! how to handle it -- for example, retrying on transient fetch errors while
8//! immediately aborting on configuration mistakes.
9//!
10//! Both `scrapling_fetch::FetchError` and `scrapling_browser::BrowserError` implement
11//! `Into<SpiderError>`, so the `?` operator works seamlessly when calling into those
12//! crates from spider code.
13
14use std::fmt;
15
16/// The central error type for everything that can go wrong during a crawl.
17///
18/// Each variant wraps either a structured error from a downstream crate or a
19/// human-readable `String` describing the problem. You will typically encounter
20/// this type through the [`Result`] alias rather than constructing it directly.
21#[derive(Debug)]
22pub enum SpiderError {
23 /// A configuration validation error, raised when spider settings are invalid
24 /// (for example, a negative checkpoint interval). The string describes what
25 /// was wrong with the configuration.
26 Config(String),
27 /// An error originating from the HTTP fetch layer (`scrapling-fetch`). This
28 /// wraps the underlying `FetchError` so you can inspect network-level details
29 /// such as connection timeouts or DNS failures.
30 Fetch(scrapling_fetch::FetchError),
31 /// An error originating from the browser automation layer (`scrapling-browser`).
32 /// This wraps the underlying `BrowserError`, which covers headless-browser
33 /// launch failures, page navigation errors, and similar issues.
34 Browser(scrapling_browser::BrowserError),
35 /// A session management error, raised when a requested session ID does not
36 /// exist or when a duplicate session is registered. Check the contained message
37 /// for the list of available session IDs.
38 Session(String),
39 /// A checkpoint save or restore error, raised when the crawler cannot write or
40 /// read its state snapshot on disk. Common causes include missing directories
41 /// and permission problems.
42 Checkpoint(String),
43 /// A robots.txt parsing or enforcement error. In practice this variant is
44 /// rarely surfaced because the robots.txt manager degrades gracefully (treating
45 /// unparseable files as "allow all"), but it exists for explicit error paths.
46 RobotsTxt(String),
47 /// A catch-all error for uncategorized failures that do not fit into any other
48 /// variant. Use this sparingly; prefer a more specific variant when one applies.
49 Other(String),
50}
51
52impl fmt::Display for SpiderError {
53 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
54 match self {
55 Self::Config(e) => write!(f, "config error: {e}"),
56 Self::Fetch(e) => write!(f, "fetch error: {e}"),
57 Self::Browser(e) => write!(f, "browser error: {e}"),
58 Self::Session(e) => write!(f, "session error: {e}"),
59 Self::Checkpoint(e) => write!(f, "checkpoint error: {e}"),
60 Self::RobotsTxt(e) => write!(f, "robots.txt error: {e}"),
61 Self::Other(e) => write!(f, "{e}"),
62 }
63 }
64}
65
66impl std::error::Error for SpiderError {}
67
68impl From<scrapling_fetch::FetchError> for SpiderError {
69 fn from(e: scrapling_fetch::FetchError) -> Self {
70 Self::Fetch(e)
71 }
72}
73
74impl From<scrapling_browser::BrowserError> for SpiderError {
75 fn from(e: scrapling_browser::BrowserError) -> Self {
76 Self::Browser(e)
77 }
78}
79
80/// A convenience alias so every function in this crate can write `Result<T>`
81/// instead of `std::result::Result<T, SpiderError>`. This is re-exported from
82/// the crate root, so downstream code can use `scrapling_spider::Result` directly.
83pub type Result<T> = std::result::Result<T, SpiderError>;