Skip to main content

scrape_core/parser/
error.rs

1//! Parser error types.
2
3use thiserror::Error;
4
5use crate::error::{SourceSpan, SpanContext};
6
7/// Result type for parser operations.
8pub type ParseResult<T> = Result<T, ParseError>;
9
10/// Errors that can occur during HTML parsing.
11#[derive(Debug, Error)]
12pub enum ParseError {
13    /// Document exceeds maximum nesting depth.
14    #[error("maximum nesting depth of {max_depth} exceeded{}", format_position(span.as_ref()))]
15    MaxDepthExceeded {
16        /// Configured maximum depth.
17        max_depth: usize,
18        /// Source location, if available.
19        span: Option<SourceSpan>,
20    },
21
22    /// Input is empty or contains only whitespace.
23    #[error("empty or whitespace-only input")]
24    EmptyInput,
25
26    /// Encoding error in input.
27    #[error("encoding error: {message}")]
28    EncodingError {
29        /// Description of the encoding problem.
30        message: String,
31    },
32
33    /// Malformed HTML construct.
34    #[error("malformed HTML: {message}{}", format_position(span.as_ref()))]
35    MalformedHtml {
36        /// Description of the malformation.
37        message: String,
38        /// Source location, if available.
39        span: Option<SourceSpan>,
40    },
41
42    /// Internal parser error.
43    #[error("internal parser error: {0}")]
44    InternalError(String),
45}
46
47fn format_position(span: Option<&SourceSpan>) -> String {
48    span.map_or_else(String::new, |s| {
49        format!(" at line {}, column {}", s.start.line, s.start.column)
50    })
51}
52
53impl ParseError {
54    /// Returns the source span associated with this error, if any.
55    #[must_use]
56    pub fn span(&self) -> Option<&SourceSpan> {
57        match self {
58            Self::MaxDepthExceeded { span, .. } | Self::MalformedHtml { span, .. } => span.as_ref(),
59            _ => None,
60        }
61    }
62
63    /// Returns the line number of the error (1-indexed), if available.
64    #[must_use]
65    pub fn line(&self) -> Option<usize> {
66        self.span().map(|s| s.start.line)
67    }
68
69    /// Returns the column number of the error (1-indexed), if available.
70    #[must_use]
71    pub fn column(&self) -> Option<usize> {
72        self.span().map(|s| s.start.column)
73    }
74
75    /// Returns the span context for display, given the source text.
76    #[must_use]
77    pub fn span_context(&self, source: &str) -> Option<SpanContext> {
78        self.span().map(|s| SpanContext::from_source(source, s))
79    }
80}
81
82#[cfg(test)]
83mod tests {
84    use super::*;
85
86    #[test]
87    fn test_max_depth_exceeded_display() {
88        let err = ParseError::MaxDepthExceeded { max_depth: 512, span: None };
89        assert_eq!(err.to_string(), "maximum nesting depth of 512 exceeded");
90    }
91
92    #[test]
93    fn test_max_depth_exceeded_with_position() {
94        use crate::error::SourcePosition;
95        let span =
96            SourceSpan::new(SourcePosition::new(10, 5, 100), SourcePosition::new(10, 10, 105));
97        let err = ParseError::MaxDepthExceeded { max_depth: 512, span: Some(span) };
98        assert_eq!(err.to_string(), "maximum nesting depth of 512 exceeded at line 10, column 5");
99        assert_eq!(err.line(), Some(10));
100        assert_eq!(err.column(), Some(5));
101    }
102
103    #[test]
104    fn test_empty_input_display() {
105        let err = ParseError::EmptyInput;
106        assert_eq!(err.to_string(), "empty or whitespace-only input");
107    }
108
109    #[test]
110    fn test_encoding_error_display() {
111        let err = ParseError::EncodingError { message: "invalid UTF-8 sequence".into() };
112        assert_eq!(err.to_string(), "encoding error: invalid UTF-8 sequence");
113    }
114
115    #[test]
116    fn test_internal_error_display() {
117        let err = ParseError::InternalError("unexpected state".into());
118        assert_eq!(err.to_string(), "internal parser error: unexpected state");
119    }
120
121    #[test]
122    fn test_malformed_html_with_span() {
123        use crate::error::SourcePosition;
124        let span = SourceSpan::new(SourcePosition::new(2, 7, 12), SourcePosition::new(2, 12, 17));
125        let err = ParseError::MalformedHtml { message: "unclosed tag".into(), span: Some(span) };
126        assert_eq!(err.to_string(), "malformed HTML: unclosed tag at line 2, column 7");
127        assert_eq!(err.line(), Some(2));
128        assert_eq!(err.column(), Some(7));
129    }
130
131    #[test]
132    fn test_span_context() {
133        use crate::error::SourcePosition;
134        let source = "line1\nline2 error here\nline3";
135        let span = SourceSpan::new(SourcePosition::new(2, 7, 12), SourcePosition::new(2, 12, 17));
136        let err = ParseError::MalformedHtml { message: "test".into(), span: Some(span) };
137
138        let ctx = err.span_context(source);
139        assert!(ctx.is_some());
140        let ctx = ctx.unwrap();
141        assert_eq!(ctx.line_number, 2);
142        assert_eq!(ctx.line_text, "line2 error here");
143    }
144
145    #[test]
146    fn test_error_without_span() {
147        let err = ParseError::EmptyInput;
148        assert_eq!(err.line(), None);
149        assert_eq!(err.column(), None);
150        assert_eq!(err.span(), None);
151    }
152}