Skip to main content

schemaorg_rs/
error.rs

1//! Error and warning types for structured data extraction.
2//!
3//! This module provides two levels of diagnostics:
4//!
5//! - [`ExtractionError`] -- fatal errors that prevent extraction from completing.
6//!   These are rare; most issues are captured as warnings instead.
7//! - [`ExtractionWarning`] -- non-fatal issues that did not prevent extraction
8//!   but may affect data quality. Each warning carries a machine-readable
9//!   [`WarningCode`] for programmatic handling.
10//!
11//! # Design
12//!
13//! The extraction pipeline is designed to be lenient: individual format
14//! failures (e.g. invalid JSON in a `<script>` tag) are captured as warnings
15//! so that other formats can still produce results. Only truly unrecoverable
16//! errors propagate as [`ExtractionError`].
17//!
18//! # Examples
19//!
20//! ```
21//! use schemaorg_rs::error::{ExtractionWarning, WarningCode};
22//!
23//! let warning = ExtractionWarning {
24//!     message: "empty JSON-LD script tag".into(),
25//!     source_location: None,
26//!     code: WarningCode::MalformedJsonLd,
27//! };
28//!
29//! assert_eq!(warning.code, WarningCode::MalformedJsonLd);
30//! ```
31
32use std::fmt;
33
34use serde::{Deserialize, Serialize};
35
36use crate::types::SourceLocation;
37use thiserror::Error;
38
39/// Fatal extraction errors that prevent further processing.
40///
41/// Most extraction issues are captured as [`ExtractionWarning`]s instead.
42/// This enum is reserved for errors that make it impossible to produce
43/// any meaningful output.
44///
45/// # Examples
46///
47/// ```
48/// use schemaorg_rs::error::ExtractionError;
49///
50/// let err = ExtractionError::Internal("unexpected state".into());
51/// assert_eq!(err.to_string(), "extraction failed: unexpected state");
52/// ```
53#[derive(Debug, Error)]
54#[non_exhaustive]
55pub enum ExtractionError {
56    /// A JSON-LD script body contained invalid JSON.
57    ///
58    /// Note: This variant is available for library consumers who parse
59    /// JSON-LD outside the extractor pipeline. The built-in extractors
60    /// convert JSON parse failures into warnings instead.
61    #[cfg(feature = "extraction")]
62    #[error("invalid JSON in <script type=\"application/ld+json\">")]
63    JsonParse(#[source] serde_json::Error),
64
65    /// An internal error occurred during extraction.
66    #[error("extraction failed: {0}")]
67    Internal(String),
68}
69
70/// A non-fatal warning produced during extraction.
71///
72/// Warnings indicate issues that did not prevent extraction but may
73/// affect data quality (e.g. malformed markup, unresolvable references).
74///
75/// # Examples
76///
77/// ```
78/// use schemaorg_rs::error::{ExtractionWarning, WarningCode};
79///
80/// let warning = ExtractionWarning {
81/// message: "JSON-LD object has no @type".into(),
82/// source_location: None,
83/// code: WarningCode::EmptyType,
84/// };
85///
86/// assert_eq!(warning.code, WarningCode::EmptyType);
87/// ```
88#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
89pub struct ExtractionWarning {
90    /// Human-readable description of the warning.
91    pub message: String,
92    /// Location in the HTML where the issue was found.
93    pub source_location: Option<SourceLocation>,
94    /// Machine-readable warning code.
95    pub code: WarningCode,
96}
97
98/// Machine-readable warning codes.
99///
100/// Each code corresponds to a specific class of extraction issue.
101/// Use these for programmatic filtering and reporting.
102///
103/// # Examples
104///
105/// ```
106/// use schemaorg_rs::error::WarningCode;
107///
108/// let code = WarningCode::MalformedJsonLd;
109/// assert_eq!(code.to_string(), "malformed-json-ld");
110/// ```
111#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
112#[non_exhaustive]
113pub enum WarningCode {
114    /// Invalid or unparseable JSON-LD content.
115    MalformedJsonLd,
116    /// Invalid or incomplete Microdata markup.
117    MalformedMicrodata,
118    /// Invalid or incomplete `RDFa` markup.
119    MalformedRdfa,
120    /// An `@id` reference could not be resolved.
121    UnresolvableReference,
122    /// A structured data node has no `@type` / `itemtype` / `typeof`.
123    EmptyType,
124    /// Multiple nodes share the same `@id`.
125    DuplicateId,
126    /// An entire extractor failed (captured so other formats still run).
127    ExtractorFailed,
128}
129
130impl fmt::Display for WarningCode {
131    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
132        match self {
133            Self::MalformedJsonLd => write!(f, "malformed-json-ld"),
134            Self::MalformedMicrodata => write!(f, "malformed-microdata"),
135            Self::MalformedRdfa => write!(f, "malformed-rdfa"),
136            Self::UnresolvableReference => write!(f, "unresolvable-reference"),
137            Self::EmptyType => write!(f, "empty-type"),
138            Self::DuplicateId => write!(f, "duplicate-id"),
139            Self::ExtractorFailed => write!(f, "extractor-failed"),
140        }
141    }
142}