flowscope_core/
error.rs

1//! Error types for SQL parsing and analysis.
2//!
3//! # Error Handling Strategy
4//!
5//! This crate uses two complementary error handling patterns:
6//!
7//! - [`ParseError`]: Fatal errors that prevent SQL parsing. Returned as `Result<T, ParseError>`
8//!   and stop processing of the affected statement.
9//!
10//! - [`crate::types::Issue`]: Non-fatal warnings and errors collected during analysis
11//!   (e.g., unresolved table references, missing columns). These are accumulated in a
12//!   vector and returned alongside successful analysis results, allowing partial lineage
13//!   extraction even when some references cannot be resolved.
14//!
15//! This separation allows the analyzer to be resilient: parsing must succeed, but
16//! analysis can continue with incomplete information while reporting issues.
17
18use crate::types::Dialect;
19use regex::Regex;
20use std::fmt;
21use std::sync::OnceLock;
22#[cfg(feature = "tracing")]
23use tracing::trace;
24
25/// Error encountered during SQL parsing.
26///
27/// This error preserves structured information from the underlying parser
28/// including position information when available.
29#[derive(Debug, Clone)]
30pub struct ParseError {
31    /// Human-readable error message.
32    pub message: String,
33    /// Byte offset where the error occurred, if available.
34    pub position: Option<Position>,
35    /// The SQL dialect being parsed when the error occurred.
36    pub dialect: Option<Dialect>,
37    /// The specific category of parse error.
38    pub kind: ParseErrorKind,
39}
40
41/// Position information for a parse error.
42#[derive(Debug, Clone, Copy, PartialEq, Eq)]
43pub struct Position {
44    /// Line number (1-indexed).
45    pub line: usize,
46    /// Column number (1-indexed).
47    pub column: usize,
48}
49
50/// Category of parse error for programmatic handling.
51#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
52pub enum ParseErrorKind {
53    /// Unexpected token or character in input.
54    #[default]
55    SyntaxError,
56    /// Missing required clause or keyword.
57    MissingClause,
58    /// Invalid or unexpected end of input.
59    UnexpectedEof,
60    /// Feature not supported by the current dialect.
61    UnsupportedFeature,
62    /// Lexer/tokenization error.
63    LexerError,
64}
65
66impl ParseError {
67    /// Creates a new parse error with just a message.
68    pub fn new(message: impl Into<String>) -> Self {
69        Self {
70            message: message.into(),
71            position: None,
72            dialect: None,
73            kind: ParseErrorKind::SyntaxError,
74        }
75    }
76
77    /// Creates a parse error with position information.
78    pub fn with_position(message: impl Into<String>, line: usize, column: usize) -> Self {
79        Self {
80            message: message.into(),
81            position: Some(Position { line, column }),
82            dialect: None,
83            kind: ParseErrorKind::SyntaxError,
84        }
85    }
86
87    /// Adds dialect context to the error.
88    pub fn with_dialect(mut self, dialect: Dialect) -> Self {
89        self.dialect = Some(dialect);
90        self
91    }
92
93    /// Sets the error kind.
94    pub fn with_kind(mut self, kind: ParseErrorKind) -> Self {
95        self.kind = kind;
96        self
97    }
98
99    /// Parses position from sqlparser error message format.
100    ///
101    /// sqlparser uses format like "Expected ..., found ... at Line: X, Column: Y"
102    ///
103    /// # Implementation Note
104    ///
105    /// This parsing is coupled to the `sqlparser` crate's error message format.
106    /// Uses regex for robust parsing that handles various whitespace and formatting
107    /// variations. Gracefully returns `None` when the expected format is not found.
108    fn parse_position_from_message(message: &str) -> Option<Position> {
109        // Use a static regex for performance - compiled once on first use
110        static POSITION_REGEX: OnceLock<Regex> = OnceLock::new();
111        let re = POSITION_REGEX.get_or_init(|| {
112            // Match "Line: <number>" followed by optional comma/whitespace, then "Column: <number>"
113            // Handles variations like "Line: 1, Column: 5" or "Line:1,Column:5"
114            Regex::new(r"Line:\s*(\d+)\s*,\s*Column:\s*(\d+)").expect("Invalid regex pattern")
115        });
116
117        let result = re.captures(message).and_then(|caps| {
118            let line: usize = caps.get(1)?.as_str().parse().ok()?;
119            let column: usize = caps.get(2)?.as_str().parse().ok()?;
120            Some(Position { line, column })
121        });
122
123        #[cfg(feature = "tracing")]
124        if result.is_none() && (message.contains("Line") || message.contains("Column")) {
125            trace!(
126                "Failed to parse position from error message that appears to contain position info: {}",
127                message
128            );
129        }
130
131        result
132    }
133
134    /// Determines the error kind from the message content.
135    ///
136    /// # Implementation Note
137    ///
138    /// Like [`Self::parse_position_from_message`], this function relies on patterns
139    /// in `sqlparser` error messages and may need updates if those messages change.
140    fn infer_kind_from_message(message: &str) -> ParseErrorKind {
141        let lower = message.to_lowercase();
142        if lower.contains("unexpected end") || lower.contains("eof") {
143            ParseErrorKind::UnexpectedEof
144        } else if lower.contains("expected") {
145            ParseErrorKind::MissingClause
146        } else if lower.contains("not supported") || lower.contains("unsupported") {
147            ParseErrorKind::UnsupportedFeature
148        } else if lower.contains("lexer") || lower.contains("token") {
149            ParseErrorKind::LexerError
150        } else {
151            ParseErrorKind::SyntaxError
152        }
153    }
154}
155
156impl fmt::Display for ParseError {
157    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
158        write!(f, "Parse error")?;
159
160        if let Some(dialect) = self.dialect {
161            write!(f, " ({dialect:?})")?;
162        }
163
164        if let Some(pos) = self.position {
165            write!(f, " at line {}, column {}", pos.line, pos.column)?;
166        }
167
168        write!(f, ": {}", self.message)
169    }
170}
171
172impl std::error::Error for ParseError {}
173
174impl From<sqlparser::parser::ParserError> for ParseError {
175    fn from(err: sqlparser::parser::ParserError) -> Self {
176        let message = err.to_string();
177        let position = Self::parse_position_from_message(&message);
178        let kind = Self::infer_kind_from_message(&message);
179
180        Self {
181            message,
182            position,
183            dialect: None,
184            kind,
185        }
186    }
187}
188
189#[cfg(test)]
190mod tests {
191    use super::*;
192
193    #[test]
194    fn test_parse_position_from_message() {
195        let msg = "Expected SELECT, found 'INSERT' at Line: 1, Column: 5";
196        let pos = ParseError::parse_position_from_message(msg);
197        assert_eq!(pos, Some(Position { line: 1, column: 5 }));
198    }
199
200    #[test]
201    fn test_parse_position_no_position() {
202        let msg = "Unexpected token";
203        let pos = ParseError::parse_position_from_message(msg);
204        assert_eq!(pos, None);
205    }
206
207    #[test]
208    fn test_parse_position_no_whitespace() {
209        let msg = "Error at Line:1,Column:5";
210        let pos = ParseError::parse_position_from_message(msg);
211        assert_eq!(pos, Some(Position { line: 1, column: 5 }));
212    }
213
214    #[test]
215    fn test_parse_position_extra_whitespace() {
216        let msg = "Error at Line:  42 ,  Column:   99";
217        let pos = ParseError::parse_position_from_message(msg);
218        assert_eq!(
219            pos,
220            Some(Position {
221                line: 42,
222                column: 99
223            })
224        );
225    }
226
227    #[test]
228    fn test_parse_position_large_numbers() {
229        let msg = "Error at Line: 99999, Column: 88888";
230        let pos = ParseError::parse_position_from_message(msg);
231        assert_eq!(
232            pos,
233            Some(Position {
234                line: 99999,
235                column: 88888
236            })
237        );
238    }
239
240    #[test]
241    fn test_parse_position_malformed_non_numeric_line() {
242        let msg = "Error at Line: abc, Column: 5";
243        let pos = ParseError::parse_position_from_message(msg);
244        assert_eq!(pos, None);
245    }
246
247    #[test]
248    fn test_parse_position_malformed_non_numeric_column() {
249        let msg = "Error at Line: 1, Column: xyz";
250        let pos = ParseError::parse_position_from_message(msg);
251        assert_eq!(pos, None);
252    }
253
254    #[test]
255    fn test_parse_position_malformed_empty_values() {
256        let msg = "Error at Line: , Column: ";
257        let pos = ParseError::parse_position_from_message(msg);
258        assert_eq!(pos, None);
259    }
260
261    #[test]
262    fn test_parse_position_partial_line_only() {
263        let msg = "Error at Line: 5";
264        let pos = ParseError::parse_position_from_message(msg);
265        assert_eq!(pos, None);
266    }
267
268    #[test]
269    fn test_parse_position_partial_column_only() {
270        let msg = "Error at Column: 5";
271        let pos = ParseError::parse_position_from_message(msg);
272        assert_eq!(pos, None);
273    }
274
275    #[test]
276    fn test_parse_position_reversed_order() {
277        // If format changes to Column first, it should fail gracefully
278        let msg = "Error at Column: 5, Line: 1";
279        let pos = ParseError::parse_position_from_message(msg);
280        assert_eq!(pos, None);
281    }
282
283    #[test]
284    fn test_parse_position_negative_numbers() {
285        // Negative numbers shouldn't match (regex only matches digits)
286        let msg = "Error at Line: -1, Column: -5";
287        let pos = ParseError::parse_position_from_message(msg);
288        assert_eq!(pos, None);
289    }
290
291    #[test]
292    fn test_infer_kind_eof() {
293        let kind = ParseError::infer_kind_from_message("Unexpected end of input");
294        assert_eq!(kind, ParseErrorKind::UnexpectedEof);
295    }
296
297    #[test]
298    fn test_infer_kind_expected() {
299        let kind = ParseError::infer_kind_from_message("Expected SELECT keyword");
300        assert_eq!(kind, ParseErrorKind::MissingClause);
301    }
302
303    #[test]
304    fn test_infer_kind_unsupported() {
305        let kind = ParseError::infer_kind_from_message("Feature not supported");
306        assert_eq!(kind, ParseErrorKind::UnsupportedFeature);
307
308        let kind = ParseError::infer_kind_from_message("This is unsupported");
309        assert_eq!(kind, ParseErrorKind::UnsupportedFeature);
310    }
311
312    #[test]
313    fn test_infer_kind_lexer() {
314        let kind = ParseError::infer_kind_from_message("Lexer error: invalid character");
315        assert_eq!(kind, ParseErrorKind::LexerError);
316
317        let kind = ParseError::infer_kind_from_message("Invalid token at position 5");
318        assert_eq!(kind, ParseErrorKind::LexerError);
319    }
320
321    #[test]
322    fn test_infer_kind_default() {
323        let kind = ParseError::infer_kind_from_message("Something went wrong");
324        assert_eq!(kind, ParseErrorKind::SyntaxError);
325    }
326
327    #[test]
328    fn test_display_with_position() {
329        let err = ParseError::with_position("Unexpected token", 10, 5);
330        assert_eq!(
331            err.to_string(),
332            "Parse error at line 10, column 5: Unexpected token"
333        );
334    }
335
336    #[test]
337    fn test_display_with_dialect() {
338        let err = ParseError::new("Bad syntax").with_dialect(Dialect::Postgres);
339        assert_eq!(err.to_string(), "Parse error (Postgres): Bad syntax");
340    }
341
342    #[test]
343    fn test_display_with_dialect_and_position() {
344        let err = ParseError::with_position("Bad syntax", 1, 5).with_dialect(Dialect::Snowflake);
345        assert_eq!(
346            err.to_string(),
347            "Parse error (Snowflake) at line 1, column 5: Bad syntax"
348        );
349    }
350
351    #[test]
352    fn test_from_parser_error() {
353        // Simulate a sqlparser error message
354        let message = "Expected expression, found EOF at Line: 3, Column: 12";
355        let pos = ParseError::parse_position_from_message(message);
356        assert_eq!(
357            pos,
358            Some(Position {
359                line: 3,
360                column: 12
361            })
362        );
363    }
364
365    #[test]
366    fn test_with_kind_builder() {
367        let err = ParseError::new("Error")
368            .with_kind(ParseErrorKind::UnexpectedEof)
369            .with_dialect(Dialect::Postgres);
370        assert_eq!(err.kind, ParseErrorKind::UnexpectedEof);
371        assert_eq!(err.dialect, Some(Dialect::Postgres));
372    }
373
374    #[test]
375    fn test_error_trait() {
376        let err = ParseError::new("Test error");
377        let _: &dyn std::error::Error = &err;
378    }
379}