Skip to main content

saneyaml/
error.rs

1//! Error and source-location types shared by the parser, emitter, and Serde API.
2//!
3//! ```rust
4//! let input = "key: [unterminated\n";
5//! let error = saneyaml::parse_str(input).unwrap_err();
6//! assert!(error.location().is_some());
7//! assert!(error.render_source(input).to_string().contains('^'));
8//! ```
9
10use std::fmt;
11
12/// Broad, stable category for a YAML error.
13#[derive(Clone, Copy, Debug, PartialEq, Eq)]
14#[non_exhaustive]
15pub enum ErrorCategory {
16    /// YAML syntax or structural parse failure.
17    Syntax,
18    /// Input bytes are not valid for the requested text encoding.
19    Encoding,
20    /// Reader or writer I/O failure.
21    Io,
22    /// Configured input, nesting, or expansion limit was exceeded.
23    Limit,
24    /// Anchor, alias, or other reference resolution failure.
25    Reference,
26    /// Duplicate mapping key failure.
27    DuplicateKey,
28    /// Serde data-model or typed value mismatch.
29    Data,
30    /// Requested operation is outside the implemented YAML surface.
31    Unsupported,
32    /// Source-preserving lossless edit or graph failure.
33    Lossless,
34    /// Error category was not classified more narrowly.
35    Other,
36}
37
38/// A byte span plus one-based line and column for a YAML source location.
39#[derive(Clone, Copy, Debug, Default, PartialEq, Eq)]
40pub struct Span {
41    /// Zero-based byte offset where the span starts.
42    pub start: usize,
43    /// Zero-based byte offset where the span ends.
44    pub end: usize,
45    /// One-based source line for the start of the span.
46    pub line: usize,
47    /// One-based UTF-8 byte column for the start of the span (counts bytes, not characters).
48    pub column: usize,
49}
50
51/// A compact source location returned by [`Error::location`].
52#[derive(Clone, Copy, Debug, PartialEq, Eq)]
53pub struct Location {
54    index: usize,
55    line: usize,
56    column: usize,
57}
58
59impl Location {
60    /// Creates a new source location from a byte index and one-based line/column.
61    pub fn new(index: usize, line: usize, column: usize) -> Self {
62        Self {
63            index,
64            line,
65            column,
66        }
67    }
68
69    /// Returns the zero-based byte index.
70    pub fn index(&self) -> usize {
71        self.index
72    }
73
74    /// Returns the one-based source line.
75    pub fn line(&self) -> usize {
76        self.line
77    }
78
79    /// Returns the one-based UTF-8 byte column (counts bytes, not characters).
80    pub fn column(&self) -> usize {
81        self.column
82    }
83}
84
85impl Span {
86    /// Creates a span from byte bounds and a one-based start line/column.
87    pub fn new(start: usize, end: usize, line: usize, column: usize) -> Self {
88        Self {
89            start,
90            end,
91            line,
92            column,
93        }
94    }
95
96    /// Creates a zero-width span at the given byte offset and line/column.
97    pub fn point(offset: usize, line: usize, column: usize) -> Self {
98        Self::new(offset, offset, line, column)
99    }
100}
101
102pub(crate) fn utf8_error_span(input: &[u8], error: std::str::Utf8Error) -> Span {
103    let offset = error.valid_up_to();
104    let mut line = 1usize;
105    let mut column = 1usize;
106    for byte in &input[..offset] {
107        if *byte == b'\n' {
108            line += 1;
109            column = 1;
110        } else {
111            column += 1;
112        }
113    }
114    Span::point(offset, line, column)
115}
116
117/// Additional source context associated with a primary diagnostic.
118#[derive(Clone, Debug, PartialEq, Eq)]
119pub struct RelatedDiagnostic {
120    /// Message for the related source location.
121    pub message: String,
122    /// Span for the related source location.
123    pub span: Span,
124}
125
126/// Path to a value inside a YAML document.
127#[derive(Clone, Debug, Default, PartialEq, Eq)]
128pub struct ErrorPath {
129    segments: Vec<ErrorPathSegment>,
130}
131
132impl ErrorPath {
133    /// Creates an empty path.
134    pub fn new() -> Self {
135        Self {
136            segments: Vec::new(),
137        }
138    }
139
140    /// Creates a path from ordered path segments.
141    pub fn from_segments(segments: Vec<ErrorPathSegment>) -> Self {
142        Self { segments }
143    }
144
145    /// Returns the ordered path segments.
146    pub fn segments(&self) -> &[ErrorPathSegment] {
147        &self.segments
148    }
149
150    /// Returns true when this path has no segments.
151    pub fn is_empty(&self) -> bool {
152        self.segments.is_empty()
153    }
154
155    pub(crate) fn prepend(&mut self, segment: ErrorPathSegment) {
156        self.segments.insert(0, segment);
157    }
158}
159
160impl fmt::Display for ErrorPath {
161    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
162        for (index, segment) in self.segments.iter().enumerate() {
163            match segment {
164                ErrorPathSegment::Field(field) | ErrorPathSegment::Key(field)
165                    if is_plain_path_key(field) =>
166                {
167                    if index > 0 {
168                        f.write_str(".")?;
169                    }
170                    f.write_str(field)?;
171                }
172                ErrorPathSegment::Field(field) | ErrorPathSegment::Key(field) => {
173                    write!(f, "[\"{}\"]", EscapedPathString(field))?;
174                }
175                ErrorPathSegment::Index(index) => write!(f, "[{index}]")?,
176                ErrorPathSegment::ScalarKey(key) => write!(f, "[{key}]")?,
177                ErrorPathSegment::ComplexKey => f.write_str("[{complex key}]")?,
178            }
179        }
180        Ok(())
181    }
182}
183
184/// One segment of an [`ErrorPath`].
185#[derive(Clone, Debug, PartialEq, Eq)]
186#[non_exhaustive]
187pub enum ErrorPathSegment {
188    /// Named Serde struct field.
189    Field(String),
190    /// String-like YAML mapping key.
191    Key(String),
192    /// Zero-based YAML sequence index.
193    Index(usize),
194    /// Scalar YAML mapping key rendered as diagnostic text.
195    ScalarKey(String),
196    /// Complex YAML mapping key that has no compact scalar diagnostic form.
197    ComplexKey,
198}
199
200fn is_plain_path_key(value: &str) -> bool {
201    let mut chars = value.chars();
202    let Some(first) = chars.next() else {
203        return false;
204    };
205    matches!(first, 'A'..='Z' | 'a'..='z' | '_')
206        && chars.all(|ch| matches!(ch, 'A'..='Z' | 'a'..='z' | '0'..='9' | '_' | '-'))
207}
208
209struct EscapedPathString<'a>(&'a str);
210
211impl fmt::Display for EscapedPathString<'_> {
212    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
213        for ch in self.0.chars() {
214            match ch {
215                '\\' => f.write_str("\\\\")?,
216                '"' => f.write_str("\\\"")?,
217                '\n' => f.write_str("\\n")?,
218                '\r' => f.write_str("\\r")?,
219                '\t' => f.write_str("\\t")?,
220                ch if ch.is_control() => write!(f, "\\u{:04X}", ch as u32)?,
221                ch => f.write_str(ch.encode_utf8(&mut [0; 4]))?,
222            }
223        }
224        Ok(())
225    }
226}
227
228/// Structured diagnostic payload for a YAML error.
229#[derive(Clone, Debug, PartialEq, Eq)]
230pub struct Diagnostic {
231    /// Primary diagnostic message.
232    pub message: String,
233    /// Primary source span.
234    pub span: Span,
235    /// Related diagnostics, such as the first occurrence of a duplicate key.
236    pub related: Vec<RelatedDiagnostic>,
237    /// Broad diagnostic category.
238    pub category: ErrorCategory,
239    /// Optional YAML document index for stream diagnostics.
240    pub document_index: Option<usize>,
241    /// Optional path to the in-document value.
242    pub path: Option<ErrorPath>,
243}
244
245/// Error type returned by all public YAML APIs.
246#[derive(Clone, Debug, PartialEq, Eq)]
247pub struct Error {
248    diagnostic: Box<Diagnostic>,
249}
250
251/// Result alias used by this crate.
252pub type Result<T> = std::result::Result<T, Error>;
253
254impl Error {
255    /// Creates an error with an optional primary span.
256    pub fn new(message: impl Into<String>, span: impl Into<Option<Span>>) -> Self {
257        Self::with_category(message, span, ErrorCategory::Other)
258    }
259
260    /// Creates an error with an explicit category.
261    pub fn with_category(
262        message: impl Into<String>,
263        span: impl Into<Option<Span>>,
264        category: ErrorCategory,
265    ) -> Self {
266        Self {
267            diagnostic: Box::new(Diagnostic {
268                message: message.into(),
269                span: span.into().unwrap_or_default(),
270                related: Vec::new(),
271                category,
272                document_index: None,
273                path: None,
274            }),
275        }
276    }
277
278    pub(crate) fn data(message: impl Into<String>, span: impl Into<Option<Span>>) -> Self {
279        Self::with_category(message, span, ErrorCategory::Data)
280    }
281
282    pub(crate) fn syntax(message: impl Into<String>, span: impl Into<Option<Span>>) -> Self {
283        Self::with_category(message, span, ErrorCategory::Syntax)
284    }
285
286    pub(crate) fn encoding(message: impl Into<String>, span: impl Into<Option<Span>>) -> Self {
287        Self::with_category(message, span, ErrorCategory::Encoding)
288    }
289
290    pub(crate) fn io(message: impl Into<String>, span: impl Into<Option<Span>>) -> Self {
291        Self::with_category(message, span, ErrorCategory::Io)
292    }
293
294    pub(crate) fn limit(message: impl Into<String>, span: impl Into<Option<Span>>) -> Self {
295        Self::with_category(message, span, ErrorCategory::Limit)
296    }
297
298    pub(crate) fn reference(message: impl Into<String>, span: impl Into<Option<Span>>) -> Self {
299        Self::with_category(message, span, ErrorCategory::Reference)
300    }
301
302    /// Creates an error with one related diagnostic.
303    pub fn with_related(
304        message: impl Into<String>,
305        span: Span,
306        related_message: impl Into<String>,
307        related_span: Span,
308    ) -> Self {
309        Self {
310            diagnostic: Box::new(Diagnostic {
311                message: message.into(),
312                span,
313                related: vec![RelatedDiagnostic {
314                    message: related_message.into(),
315                    span: related_span,
316                }],
317                category: ErrorCategory::Other,
318                document_index: None,
319                path: None,
320            }),
321        }
322    }
323
324    pub(crate) fn with_related_category(
325        message: impl Into<String>,
326        span: Span,
327        related_message: impl Into<String>,
328        related_span: Span,
329        category: ErrorCategory,
330    ) -> Self {
331        let mut error = Self::with_related(message, span, related_message, related_span);
332        error.diagnostic.category = category;
333        error
334    }
335
336    /// Returns the structured diagnostic payload.
337    pub fn diagnostic(&self) -> &Diagnostic {
338        &self.diagnostic
339    }
340
341    /// Returns the primary span, or [`Span::default`] for spanless errors.
342    pub fn span(&self) -> Span {
343        self.diagnostic.span
344    }
345
346    /// Returns the broad diagnostic category.
347    pub fn category(&self) -> ErrorCategory {
348        self.diagnostic.category
349    }
350
351    /// Returns the zero-based document index for stream diagnostics.
352    pub fn document_index(&self) -> Option<usize> {
353        self.diagnostic.document_index
354    }
355
356    /// Returns the in-document path for Serde diagnostics.
357    pub fn path(&self) -> Option<&ErrorPath> {
358        self.diagnostic.path.as_ref()
359    }
360
361    /// Returns the primary location when the error has a nonzero line/column.
362    pub fn location(&self) -> Option<Location> {
363        let span = self.span();
364        (span.line > 0 && span.column > 0).then_some(Location::new(
365            span.start,
366            span.line,
367            span.column,
368        ))
369    }
370
371    /// Returns the one-based line of the primary diagnostic, if available.
372    pub fn line(&self) -> Option<usize> {
373        self.location().map(|location| location.line())
374    }
375
376    /// Returns the one-based column of the primary diagnostic, if available.
377    pub fn column(&self) -> Option<usize> {
378        self.location().map(|location| location.column())
379    }
380
381    /// Renders this error with source context and caret markers.
382    pub fn render_source<'a>(&'a self, source: &'a str) -> SourceDiagnostic<'a> {
383        self.render_source_with_options(source, SourceRenderOptions::default())
384    }
385
386    /// Renders this error with source context and custom options.
387    pub fn render_source_with_options<'a>(
388        &'a self,
389        source: &'a str,
390        options: SourceRenderOptions,
391    ) -> SourceDiagnostic<'a> {
392        self.diagnostic.render_source_with_options(source, options)
393    }
394
395    pub(crate) fn with_span_if_missing(mut self, span: Span) -> Self {
396        if self.location().is_none() {
397            self.diagnostic.span = span;
398        }
399        self
400    }
401
402    pub(crate) fn with_document_index(mut self, index: usize) -> Self {
403        if index > 0 {
404            self.diagnostic.document_index.get_or_insert(index);
405        }
406        self
407    }
408
409    pub(crate) fn with_path_segment_if_empty(mut self, segment: ErrorPathSegment) -> Self {
410        if self
411            .diagnostic
412            .path
413            .as_ref()
414            .is_none_or(ErrorPath::is_empty)
415        {
416            self.diagnostic.path = Some(ErrorPath::from_segments(vec![segment]));
417        }
418        self
419    }
420
421    pub(crate) fn prepend_path_segment(mut self, segment: ErrorPathSegment) -> Self {
422        match &mut self.diagnostic.path {
423            Some(path) => path.prepend(segment),
424            None => self.diagnostic.path = Some(ErrorPath::from_segments(vec![segment])),
425        }
426        self
427    }
428}
429
430impl Diagnostic {
431    /// Returns the broad diagnostic category.
432    pub fn category(&self) -> ErrorCategory {
433        self.category
434    }
435
436    /// Returns the zero-based document index for stream diagnostics.
437    pub fn document_index(&self) -> Option<usize> {
438        self.document_index
439    }
440
441    /// Returns the in-document path for Serde diagnostics.
442    pub fn path(&self) -> Option<&ErrorPath> {
443        self.path.as_ref()
444    }
445
446    /// Renders this diagnostic with source context and caret markers.
447    pub fn render_source<'a>(&'a self, source: &'a str) -> SourceDiagnostic<'a> {
448        self.render_source_with_options(source, SourceRenderOptions::default())
449    }
450
451    /// Renders this diagnostic with source context and custom options.
452    pub fn render_source_with_options<'a>(
453        &'a self,
454        source: &'a str,
455        options: SourceRenderOptions,
456    ) -> SourceDiagnostic<'a> {
457        SourceDiagnostic {
458            diagnostic: self,
459            source,
460            options,
461        }
462    }
463}
464
465/// Options for source-context diagnostic rendering.
466#[derive(Clone, Copy, Debug, Default, PartialEq, Eq)]
467#[non_exhaustive]
468pub struct SourceRenderOptions {
469    /// Number of source lines to render before and after the primary line.
470    ///
471    /// The default `0` preserves compact rendering with only the diagnostic
472    /// line. Nonzero values include up to that many neighboring source lines on
473    /// each side for both primary and related spans.
474    pub context_lines: usize,
475}
476
477/// Display wrapper for explicit source-context diagnostic rendering.
478#[derive(Clone, Copy, Debug)]
479pub struct SourceDiagnostic<'a> {
480    diagnostic: &'a Diagnostic,
481    source: &'a str,
482    options: SourceRenderOptions,
483}
484
485impl fmt::Display for SourceDiagnostic<'_> {
486    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
487        match diagnostic_location(self.diagnostic) {
488            Some(location) => write!(
489                f,
490                "{} at line {}, column {}",
491                self.diagnostic.message,
492                location.line(),
493                location.column()
494            )?,
495            None => f.write_str(&self.diagnostic.message)?,
496        }
497        if let Some(path) = &self.diagnostic.path
498            && !path.is_empty()
499        {
500            write!(f, "\npath: {path}")?;
501        }
502        if let Some(index) = self.diagnostic.document_index {
503            write!(f, "\ndocument: {index}")?;
504        }
505        render_span_block(f, self.source, self.diagnostic.span, self.options)?;
506        for related in &self.diagnostic.related {
507            write!(f, "\n{}", related.message)?;
508            render_span_block(f, self.source, related.span, self.options)?;
509        }
510        Ok(())
511    }
512}
513
514fn diagnostic_location(diagnostic: &Diagnostic) -> Option<Location> {
515    let span = diagnostic.span;
516    (span.line > 0 && span.column > 0).then_some(Location::new(span.start, span.line, span.column))
517}
518
519fn render_span_block(
520    f: &mut fmt::Formatter<'_>,
521    source: &str,
522    span: Span,
523    options: SourceRenderOptions,
524) -> fmt::Result {
525    if span.line == 0 || span.column == 0 || span.start > source.len() {
526        return Ok(());
527    }
528    let Some((line_start, line_end, _)) = line_bounds(source, span.start) else {
529        return Ok(());
530    };
531    let line_number = span.line;
532    let context_start = line_number.saturating_sub(options.context_lines).max(1);
533    let context_end = line_number.saturating_add(options.context_lines);
534    let width = context_end.to_string().len();
535    writeln!(f)?;
536    writeln!(f, "{:>width$} |", "", width = width)?;
537    // `span.start` is already a char boundary within `[line_start, line_end]`:
538    // `line_bounds` returns `None` for non-boundary offsets and derives both
539    // line bounds from `span.start`, so the clamp here is exact.
540    let caret_start = span.start.clamp(line_start, line_end);
541    let caret_end = floor_char_boundary(source, span.end.clamp(caret_start, line_end));
542    let mut rendered_line = false;
543    for current_line in context_start..=context_end {
544        let Some((current_start, _, line_text)) =
545            line_bounds_for_line(source, current_line, current_line == line_number)
546        else {
547            continue;
548        };
549        if rendered_line {
550            writeln!(f)?;
551        }
552        write!(f, "{current_line:>width$} | {line_text}", width = width)?;
553        if current_line == line_number {
554            writeln!(f)?;
555            write!(f, "{:>width$} | ", "", width = width)?;
556            for byte in source.as_bytes()[current_start..caret_start]
557                .iter()
558                .copied()
559            {
560                if byte == b'\t' {
561                    f.write_str("\t")?;
562                } else {
563                    f.write_str(" ")?;
564                }
565            }
566            let caret_count = caret_end.saturating_sub(caret_start).max(1);
567            for _ in 0..caret_count {
568                f.write_str("^")?;
569            }
570        }
571        rendered_line = true;
572    }
573    Ok(())
574}
575
576fn line_bounds(source: &str, offset: usize) -> Option<(usize, usize, &str)> {
577    if offset > source.len() || !source.is_char_boundary(offset) {
578        return None;
579    }
580    let line_start = source[..offset]
581        .rfind('\n')
582        .map_or(0, |index| index.saturating_add(1));
583    let line_end = source[offset..]
584        .find('\n')
585        .map_or(source.len(), |index| offset + index);
586    Some((line_start, line_end, &source[line_start..line_end]))
587}
588
589fn line_bounds_for_line(
590    source: &str,
591    target_line: usize,
592    include_trailing_empty_line: bool,
593) -> Option<(usize, usize, &str)> {
594    if target_line == 0 {
595        return None;
596    }
597    let mut line = 1usize;
598    let mut start = 0usize;
599    for part in source.split_inclusive('\n') {
600        let end = start + part.len();
601        let text_end = end.saturating_sub(usize::from(part.ends_with('\n')));
602        if line == target_line {
603            return Some((start, text_end, &source[start..text_end]));
604        }
605        start = end;
606        line += 1;
607    }
608    if include_trailing_empty_line && source.ends_with('\n') && line == target_line {
609        return Some((source.len(), source.len(), ""));
610    }
611    None
612}
613
614fn floor_char_boundary(source: &str, mut offset: usize) -> usize {
615    while offset > 0 && !source.is_char_boundary(offset) {
616        offset -= 1;
617    }
618    offset
619}
620
621impl fmt::Display for Error {
622    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
623        match self.location() {
624            Some(location) => write!(
625                f,
626                "{} at line {}, column {}",
627                self.diagnostic.message,
628                location.line(),
629                location.column()
630            ),
631            None => f.write_str(&self.diagnostic.message),
632        }
633    }
634}
635
636impl std::error::Error for Error {}
637
638impl serde::de::Error for Error {
639    fn custom<T: fmt::Display>(msg: T) -> Self {
640        Self::data(msg.to_string(), Span::default())
641    }
642
643    fn unknown_field(field: &str, expected: &'static [&'static str]) -> Self {
644        let message = if expected.is_empty() {
645            format!("unknown field `{field}`")
646        } else {
647            format!(
648                "unknown field `{}`, expected one of {}",
649                field,
650                expected
651                    .iter()
652                    .map(|field| format!("`{field}`"))
653                    .collect::<Vec<_>>()
654                    .join(", ")
655            )
656        };
657        Self::data(message, Span::default())
658            .with_path_segment_if_empty(ErrorPathSegment::Field(field.to_string()))
659    }
660
661    fn missing_field(field: &'static str) -> Self {
662        Self::data(format!("missing field `{field}`"), Span::default())
663            .with_path_segment_if_empty(ErrorPathSegment::Field(field.to_string()))
664    }
665
666    fn duplicate_field(field: &'static str) -> Self {
667        Self::data(format!("duplicate field `{field}`"), Span::default())
668            .with_path_segment_if_empty(ErrorPathSegment::Field(field.to_string()))
669    }
670}
671
672impl serde::ser::Error for Error {
673    fn custom<T>(msg: T) -> Self
674    where
675        T: fmt::Display,
676    {
677        Self::new(msg.to_string(), Span::default())
678    }
679}