serde_saphyr/
de_error.rs

1//! Defines error and its location
2use std::fmt;
3
4use crate::budget::BudgetBreach;
5use crate::parse_scalars::{
6    parse_int_signed, parse_yaml11_bool, parse_yaml12_float, scalar_is_nullish,
7};
8use crate::tags::SfTag;
9use saphyr_parser::{ScalarStyle, ScanError, Span};
10use serde::de::{self};
11
12/// Row/column location within the source YAML document (1-indexed).
13#[derive(Clone, Copy, Debug, PartialEq, Eq)]
14pub struct Location {
15    /// 1-indexed row number in the input stream.
16    pub(crate) row: u32,
17    /// 1-indexed column number in the input stream.
18    pub(crate) column: u32,
19}
20
21impl Location {
22    /// serde_yaml-compatible line information
23    pub fn line(&self) -> u64 {
24        self.row as u64
25    }
26
27    /// serde_yaml-compatible column information
28    pub fn column(&self) -> u64 {
29        self.column as u64
30    }
31}
32
33impl Location {
34    /// Sentinel value meaning "location unknown".
35    ///
36    /// Used when a precise position is not yet available at error creation time.
37    pub const UNKNOWN: Self = Self { row: 0, column: 0 };
38
39    /// Create a new location record.
40    ///
41    /// Arguments:
42    /// - `row`: 1-indexed row.
43    /// - `column`: 1-indexed column.
44    ///
45    /// Returns:
46    /// - `Location` with the provided coordinates.
47    ///
48    /// Called by:
49    /// - Parser/scan adapters that convert upstream spans to `Location`.
50    pub(crate) const fn new(row: usize, column: usize) -> Self {
51        // 4 Gb is larger than any YAML document I can imagine, and also this is
52        // error reporting only.
53        Self {
54            row: row as u32,
55            column: column as u32,
56        }
57    }
58}
59
60/// Convert a `saphyr_parser::Span` to a 1-indexed `Location`.
61///
62/// Called by:
63/// - The live events adapter for each raw parser event.
64pub(crate) fn location_from_span(span: &Span) -> Location {
65    let start = &span.start;
66    Location::new(start.line(), start.col() + 1)
67}
68
69/// Error type compatible with `serde::de::Error`.
70#[derive(Debug)]
71pub enum Error {
72    /// Free-form error with optional source location.
73    Message {
74        msg: String,
75        location: Location,
76    },
77    /// Unexpected end of input.
78    Eof {
79        location: Location,
80    },
81    /// Structural/type mismatch — something else than the expected token/value was seen.
82    Unexpected {
83        expected: &'static str,
84        location: Location,
85    },
86    ContainerEndMismatch {
87        location: Location,
88    },
89    /// Alias references a non-existent anchor id.
90    UnknownAnchor {
91        id: usize,
92        location: Location,
93    },
94    /// Error when parsing robotic and other extensions beyond standard YAML.
95    /// (error in extension hook).
96    HookError {
97        msg: String,
98        location: Location,
99    },
100    /// A YAML budget limit was exceeded.
101    Budget {
102        breach: BudgetBreach,
103        location: Location,
104    },
105    /// Unexpected I/O error. This may happen only when deserializing from a reader.
106    IOError {
107        cause: std::io::Error,
108    },
109    /// The value is targeted to the string field but can be interpreted as a number or boolean.
110    /// This error can only happens if no_schema set true.
111    QuotingRequired {
112        value: String, // sanitized (checked) value that must be quoted
113        location: Location,
114    },
115}
116
117impl Error {
118    /// Construct a `Message` error with no known location.
119    ///
120    /// Arguments:
121    /// - `s`: human-readable message.
122    ///
123    /// Returns:
124    /// - `Error::Message` pointing at [`Location::UNKNOWN`].
125    ///
126    /// Called by:
127    /// - Scalar parsers and helpers throughout this module.
128    pub(crate) fn msg<S: Into<String>>(s: S) -> Self {
129        Error::Message {
130            msg: s.into(),
131            location: Location::UNKNOWN,
132        }
133    }
134
135    /// Construct a `QuotingRequired` error with no known location.
136    /// Called by:
137    /// - Deserializer, when deserializing into string if no_schema set to true.
138    pub(crate) fn quoting_required(value: &str) -> Self {
139        // Ensure the value really is like number or boolean (do not reflect back content
140        // that may be used for attack)
141        let location = Location::UNKNOWN;
142        let value = if parse_yaml12_float::<f64>(value, location, SfTag::None, false).is_ok()
143            || parse_int_signed::<i128>(value, "i128", location, false).is_ok()
144            || parse_yaml11_bool(value).is_ok()
145            || scalar_is_nullish(value, &ScalarStyle::Plain)
146        {
147            value.to_string()
148        } else {
149            String::new()
150        };
151        Error::QuotingRequired { value, location }
152    }
153
154    /// Convenience for an `Unexpected` error pre-filled with a human phrase.
155    ///
156    /// Arguments:
157    /// - `what`: short description like "sequence start".
158    ///
159    /// Returns:
160    /// - `Error::Unexpected` at unknown location.
161    ///
162    /// Called by:
163    /// - Deserializer methods that validate the next event kind.
164    pub(crate) fn unexpected(what: &'static str) -> Self {
165        Error::Unexpected {
166            expected: what,
167            location: Location::UNKNOWN,
168        }
169    }
170
171    /// Construct an unexpected end-of-input error with unknown location.
172    ///
173    /// Used by:
174    /// - Lookahead and pull methods when `None` appears prematurely.
175    pub(crate) fn eof() -> Self {
176        Error::Eof {
177            location: Location::UNKNOWN,
178        }
179    }
180
181    /// Construct an `UnknownAnchor` error for the given anchor id (unknown location).
182    ///
183    /// Called by:
184    /// - Alias replay logic in the live event source.
185    pub(crate) fn unknown_anchor(id: usize) -> Self {
186        Error::UnknownAnchor {
187            id,
188            location: Location::UNKNOWN,
189        }
190    }
191
192    /// Attach/override a concrete location to this error and return it.
193    ///
194    /// Arguments:
195    /// - `set_location`: location to store in the error.
196    ///
197    /// Returns:
198    /// - The same `Error` with location updated.
199    ///
200    /// Called by:
201    /// - Most error paths once the event position becomes known.
202    pub(crate) fn with_location(mut self, set_location: Location) -> Self {
203        match &mut self {
204            Error::Message { location, .. }
205            | Error::Eof { location }
206            | Error::Unexpected { location, .. }
207            | Error::HookError { location, .. }
208            | Error::ContainerEndMismatch { location, .. }
209            | Error::UnknownAnchor { location, .. }
210            | Error::QuotingRequired { location, .. }
211            | Error::Budget { location, .. } => {
212                *location = set_location;
213            }
214            Error::IOError { .. } => {} // this error does not support location
215        }
216        self
217    }
218
219    /// If the error has a known location, return it.
220    ///
221    /// Returns:
222    /// - `Some(Location)` when coordinates are known; `None` otherwise.
223    ///
224    /// Used by:
225    /// - Callers that want to surface precise positions to users.
226    pub fn location(&self) -> Option<Location> {
227        match self {
228            Error::Message { location, .. }
229            | Error::Eof { location }
230            | Error::Unexpected { location, .. }
231            | Error::HookError { location, .. }
232            | Error::ContainerEndMismatch { location, .. }
233            | Error::UnknownAnchor { location, .. }
234            | Error::QuotingRequired { location, .. }
235            | Error::Budget { location, .. } => {
236                if location != &Location::UNKNOWN {
237                    Some(*location)
238                } else {
239                    None
240                }
241            }
242            Error::IOError { cause: _ } => None,
243        }
244    }
245
246    /// Map a `saphyr_parser::ScanError` into our error type with location.
247    ///
248    /// Called by:
249    /// - The live events adapter when the underlying parser fails.
250    pub(crate) fn from_scan_error(err: ScanError) -> Self {
251        let mark = err.marker();
252        let location = Location::new(mark.line(), mark.col() + 1);
253        Error::Message {
254            msg: err.info().to_owned(),
255            location,
256        }
257    }
258}
259
260impl fmt::Display for Error {
261    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
262        match self {
263            Error::Message { msg, location } => fmt_with_location(f, msg, location),
264            Error::HookError { msg, location } => fmt_with_location(f, msg, location),
265            Error::Eof { location } => fmt_with_location(f, "unexpected end of input", location),
266            Error::Unexpected { expected, location } => fmt_with_location(
267                f,
268                &format!("unexpected event: expected {expected}"),
269                location,
270            ),
271            Error::ContainerEndMismatch { location } => {
272                fmt_with_location(f, "list or mapping end with no start", location)
273            }
274            Error::UnknownAnchor { id, location } => fmt_with_location(
275                f,
276                &format!("alias references unknown anchor id {id}"),
277                location,
278            ),
279            Error::Budget { breach, location } => {
280                fmt_with_location(f, &format!("YAML budget breached: {breach:?}"), location)
281            }
282            Error::QuotingRequired { value, location } => fmt_with_location(
283                f,
284                &format!("The string value [{value}] must be quoted"),
285                location,
286            ),
287            Error::IOError { cause } => write!(f, "IO error: {}", cause),
288        }
289    }
290}
291impl std::error::Error for Error {}
292impl de::Error for Error {
293    fn custom<T: fmt::Display>(msg: T) -> Self {
294        Error::msg(msg.to_string())
295    }
296}
297
298/// Print a message optionally suffixed with "at line X, column Y".
299///
300/// Arguments:
301/// - `f`: destination formatter.
302/// - `msg`: main text.
303/// - `location`: position to attach if known.
304///
305/// Returns:
306/// - `fmt::Result` as required by `Display`.
307fn fmt_with_location(f: &mut fmt::Formatter<'_>, msg: &str, location: &Location) -> fmt::Result {
308    if location != &Location::UNKNOWN {
309        write!(
310            f,
311            "{msg} at line {}, column {}",
312            location.row, location.column
313        )
314    } else {
315        write!(f, "{msg}")
316    }
317}
318
319/// Convert a budget breach report into a user-facing error.
320///
321/// Arguments:
322/// - `breach`: which limit was exceeded (from the streaming budget checker).
323///
324/// Returns:
325/// - `Error::Message` with a formatted description.
326///
327/// Called by:
328/// - The live events layer when enforcing budgets during/after parsing.
329pub(crate) fn budget_error(breach: BudgetBreach) -> Error {
330    Error::Budget {
331        breach,
332        location: Location::UNKNOWN,
333    }
334}