Skip to main content

scrape_core/
error.rs

1//! Error types for scrape-core.
2
3use thiserror::Error;
4
5/// Result type alias using [`enum@Error`].
6pub type Result<T> = std::result::Result<T, Error>;
7
8/// Errors that can occur during HTML parsing and querying.
9#[derive(Debug, Error)]
10pub enum Error {
11    /// Failed to parse HTML document.
12    #[error("failed to parse HTML: {message}")]
13    ParseError {
14        /// Description of what went wrong.
15        message: String,
16    },
17
18    /// Invalid CSS selector syntax.
19    #[error("invalid CSS selector: {selector}")]
20    InvalidSelector {
21        /// The selector string that failed to parse.
22        selector: String,
23    },
24
25    /// Element not found.
26    #[error("element not found: {query}")]
27    NotFound {
28        /// The query that returned no results.
29        query: String,
30    },
31
32    /// Attribute not found on element.
33    #[error("attribute '{name}' not found on element")]
34    AttributeNotFound {
35        /// The attribute name that was not found.
36        name: String,
37    },
38
39    /// I/O error when reading from file or network.
40    #[error("I/O error: {0}")]
41    Io(#[from] std::io::Error),
42
43    /// Streaming parser in invalid state for this operation.
44    #[cfg(feature = "streaming")]
45    #[error("invalid streaming state: {message}")]
46    InvalidStreamingState {
47        /// Description of the invalid state.
48        message: String,
49    },
50
51    /// Handler callback failed during streaming.
52    #[cfg(feature = "streaming")]
53    #[error("handler error: {message}")]
54    HandlerError {
55        /// Description of the handler error.
56        message: String,
57    },
58
59    /// Streaming selector compilation failed.
60    #[cfg(feature = "streaming")]
61    #[error("streaming selector error: {message}")]
62    StreamingSelectorError {
63        /// Description of the selector error.
64        message: String,
65    },
66}
67
68impl Error {
69    /// Creates a new parse error with the given message.
70    #[must_use]
71    pub fn parse(message: impl Into<String>) -> Self {
72        Self::ParseError { message: message.into() }
73    }
74
75    /// Creates a new invalid selector error.
76    #[must_use]
77    pub fn invalid_selector(selector: impl Into<String>) -> Self {
78        Self::InvalidSelector { selector: selector.into() }
79    }
80
81    /// Creates a new not found error.
82    #[must_use]
83    pub fn not_found(query: impl Into<String>) -> Self {
84        Self::NotFound { query: query.into() }
85    }
86
87    /// Creates a new attribute not found error.
88    #[must_use]
89    pub fn attribute_not_found(name: impl Into<String>) -> Self {
90        Self::AttributeNotFound { name: name.into() }
91    }
92
93    /// Creates a new invalid streaming state error.
94    #[cfg(feature = "streaming")]
95    #[must_use]
96    pub fn invalid_streaming_state(message: impl Into<String>) -> Self {
97        Self::InvalidStreamingState { message: message.into() }
98    }
99
100    /// Creates a new handler error.
101    #[cfg(feature = "streaming")]
102    #[must_use]
103    pub fn handler_error(message: impl Into<String>) -> Self {
104        Self::HandlerError { message: message.into() }
105    }
106
107    /// Creates a new streaming selector error.
108    #[cfg(feature = "streaming")]
109    #[must_use]
110    pub fn streaming_selector_error(message: impl Into<String>) -> Self {
111        Self::StreamingSelectorError { message: message.into() }
112    }
113}
114
115// Source position tracking for error reporting
116
117/// A position in source text (1-indexed line and column).
118#[derive(Debug, Clone, Copy, PartialEq, Eq)]
119pub struct SourcePosition {
120    /// Line number (1-indexed).
121    pub line: usize,
122    /// Column number (1-indexed, in characters not bytes).
123    pub column: usize,
124    /// Byte offset from start of input.
125    pub offset: usize,
126}
127
128impl SourcePosition {
129    /// Creates a new source position.
130    #[must_use]
131    pub const fn new(line: usize, column: usize, offset: usize) -> Self {
132        Self { line, column, offset }
133    }
134}
135
136/// A span in source text with start and end positions.
137#[derive(Debug, Clone, Copy, PartialEq, Eq)]
138pub struct SourceSpan {
139    /// Start position (inclusive).
140    pub start: SourcePosition,
141    /// End position (exclusive).
142    pub end: SourcePosition,
143}
144
145impl SourceSpan {
146    /// Creates a new source span.
147    #[must_use]
148    pub const fn new(start: SourcePosition, end: SourcePosition) -> Self {
149        Self { start, end }
150    }
151
152    /// Returns the length of the span in bytes.
153    #[must_use]
154    pub const fn len(&self) -> usize {
155        self.end.offset.saturating_sub(self.start.offset)
156    }
157
158    /// Returns true if the span is empty.
159    #[must_use]
160    pub const fn is_empty(&self) -> bool {
161        self.len() == 0
162    }
163}
164
165/// Context around an error for display purposes.
166#[derive(Debug, Clone)]
167pub struct SpanContext {
168    /// The line of source text containing the error.
169    pub line_text: String,
170    /// The line number (1-indexed).
171    pub line_number: usize,
172    /// Column where error starts (1-indexed).
173    pub column_start: usize,
174    /// Column where error ends (1-indexed).
175    pub column_end: usize,
176}
177
178impl SpanContext {
179    /// Creates a context from source text and span.
180    #[must_use]
181    pub fn from_source(source: &str, span: &SourceSpan) -> Self {
182        let lines: Vec<&str> = source.lines().collect();
183        let line_idx = span.start.line.saturating_sub(1);
184        let line_text = lines.get(line_idx).unwrap_or(&"").to_string();
185
186        Self {
187            line_text: line_text.clone(),
188            line_number: span.start.line,
189            column_start: span.start.column,
190            column_end: if span.start.line == span.end.line {
191                span.end.column
192            } else {
193                line_text.chars().count() + 1
194            },
195        }
196    }
197
198    /// Formats the context with error highlighting.
199    ///
200    /// Returns a multi-line string showing the error line with carets (^) indicating
201    /// the error location.
202    #[must_use]
203    pub fn format_highlight(&self) -> String {
204        use std::fmt::Write;
205        let mut result = String::new();
206        let _ = writeln!(result, "{:>4} | {}", self.line_number, self.line_text);
207        let _ = write!(
208            result,
209            "     | {}{}",
210            " ".repeat(self.column_start.saturating_sub(1)),
211            "^".repeat(self.column_end.saturating_sub(self.column_start).max(1))
212        );
213        result
214    }
215}
216
217#[cfg(test)]
218mod tests {
219    use super::*;
220
221    #[test]
222    fn test_error_display() {
223        let err = Error::parse("unexpected end of input");
224        assert_eq!(err.to_string(), "failed to parse HTML: unexpected end of input");
225
226        let err = Error::invalid_selector("div[");
227        assert_eq!(err.to_string(), "invalid CSS selector: div[");
228
229        let err = Error::not_found("div.missing");
230        assert_eq!(err.to_string(), "element not found: div.missing");
231
232        let err = Error::attribute_not_found("href");
233        assert_eq!(err.to_string(), "attribute 'href' not found on element");
234    }
235
236    #[test]
237    fn test_source_position_creation() {
238        let pos = SourcePosition::new(1, 5, 4);
239        assert_eq!(pos.line, 1);
240        assert_eq!(pos.column, 5);
241        assert_eq!(pos.offset, 4);
242    }
243
244    #[test]
245    fn test_source_span_length() {
246        let span = SourceSpan::new(SourcePosition::new(1, 1, 0), SourcePosition::new(1, 6, 5));
247        assert_eq!(span.len(), 5);
248        assert!(!span.is_empty());
249    }
250
251    #[test]
252    fn test_source_span_empty() {
253        let pos = SourcePosition::new(1, 1, 0);
254        let span = SourceSpan::new(pos, pos);
255        assert_eq!(span.len(), 0);
256        assert!(span.is_empty());
257    }
258
259    #[test]
260    fn test_span_context_formatting() {
261        let source = "line1\nline2 error here\nline3";
262        let span = SourceSpan::new(SourcePosition::new(2, 7, 12), SourcePosition::new(2, 12, 17));
263        let ctx = SpanContext::from_source(source, &span);
264
265        assert_eq!(ctx.line_number, 2);
266        assert_eq!(ctx.line_text, "line2 error here");
267        assert_eq!(ctx.column_start, 7);
268        assert_eq!(ctx.column_end, 12);
269
270        let formatted = ctx.format_highlight();
271        assert!(formatted.contains("line2 error here"));
272        assert!(formatted.contains("^^^^^"));
273    }
274
275    #[test]
276    fn test_span_context_multiline_span() {
277        let source = "line1\nline2 starts here\nline3 continues";
278        let span = SourceSpan::new(SourcePosition::new(2, 13, 18), SourcePosition::new(3, 5, 29));
279        let ctx = SpanContext::from_source(source, &span);
280
281        assert_eq!(ctx.line_number, 2);
282        assert_eq!(ctx.column_start, 13);
283        assert!(ctx.column_end > ctx.column_start);
284    }
285
286    #[test]
287    fn test_span_context_single_char() {
288        let source = "hello world";
289        let span = SourceSpan::new(SourcePosition::new(1, 7, 6), SourcePosition::new(1, 8, 7));
290        let ctx = SpanContext::from_source(source, &span);
291        let formatted = ctx.format_highlight();
292        assert!(formatted.contains('^'));
293        assert!(!formatted.contains("^^"));
294    }
295
296    #[test]
297    fn test_span_context_invalid_line() {
298        let source = "line1\nline2";
299        let span =
300            SourceSpan::new(SourcePosition::new(10, 1, 100), SourcePosition::new(10, 5, 104));
301        let ctx = SpanContext::from_source(source, &span);
302
303        assert_eq!(ctx.line_text, "");
304        assert_eq!(ctx.line_number, 10);
305    }
306}