schemaorg_rs/error.rs
1//! Error and warning types for structured data extraction.
2//!
3//! This module provides two levels of diagnostics:
4//!
5//! - [`ExtractionError`] -- fatal errors that prevent extraction from completing.
6//! These are rare; most issues are captured as warnings instead.
7//! - [`ExtractionWarning`] -- non-fatal issues that did not prevent extraction
8//! but may affect data quality. Each warning carries a machine-readable
9//! [`WarningCode`] for programmatic handling.
10//!
11//! # Design
12//!
13//! The extraction pipeline is designed to be lenient: individual format
14//! failures (e.g. invalid JSON in a `<script>` tag) are captured as warnings
15//! so that other formats can still produce results. Only truly unrecoverable
16//! errors propagate as [`ExtractionError`].
17//!
18//! # Examples
19//!
20//! ```
21//! use schemaorg_rs::error::{ExtractionWarning, WarningCode};
22//!
23//! let warning = ExtractionWarning {
24//! message: "empty JSON-LD script tag".into(),
25//! source_location: None,
26//! code: WarningCode::MalformedJsonLd,
27//! };
28//!
29//! assert_eq!(warning.code, WarningCode::MalformedJsonLd);
30//! ```
31
32use std::fmt;
33
34use serde::{Deserialize, Serialize};
35
36use crate::types::SourceLocation;
37use thiserror::Error;
38
39/// Fatal extraction errors that prevent further processing.
40///
41/// Most extraction issues are captured as [`ExtractionWarning`]s instead.
42/// This enum is reserved for errors that make it impossible to produce
43/// any meaningful output.
44///
45/// # Examples
46///
47/// ```
48/// use schemaorg_rs::error::ExtractionError;
49///
50/// let err = ExtractionError::Internal("unexpected state".into());
51/// assert_eq!(err.to_string(), "extraction failed: unexpected state");
52/// ```
53#[derive(Debug, Error)]
54#[non_exhaustive]
55pub enum ExtractionError {
56 /// A JSON-LD script body contained invalid JSON.
57 ///
58 /// Note: This variant is available for library consumers who parse
59 /// JSON-LD outside the extractor pipeline. The built-in extractors
60 /// convert JSON parse failures into warnings instead.
61 #[cfg(feature = "extraction")]
62 #[error("invalid JSON in <script type=\"application/ld+json\">")]
63 JsonParse(#[source] serde_json::Error),
64
65 /// An internal error occurred during extraction.
66 #[error("extraction failed: {0}")]
67 Internal(String),
68}
69
70/// A non-fatal warning produced during extraction.
71///
72/// Warnings indicate issues that did not prevent extraction but may
73/// affect data quality (e.g. malformed markup, unresolvable references).
74///
75/// # Examples
76///
77/// ```
78/// use schemaorg_rs::error::{ExtractionWarning, WarningCode};
79///
80/// let warning = ExtractionWarning {
81/// message: "JSON-LD object has no @type".into(),
82/// source_location: None,
83/// code: WarningCode::EmptyType,
84/// };
85///
86/// assert_eq!(warning.code, WarningCode::EmptyType);
87/// ```
88#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
89pub struct ExtractionWarning {
90 /// Human-readable description of the warning.
91 pub message: String,
92 /// Location in the HTML where the issue was found.
93 pub source_location: Option<SourceLocation>,
94 /// Machine-readable warning code.
95 pub code: WarningCode,
96}
97
98/// Machine-readable warning codes.
99///
100/// Each code corresponds to a specific class of extraction issue.
101/// Use these for programmatic filtering and reporting.
102///
103/// # Examples
104///
105/// ```
106/// use schemaorg_rs::error::WarningCode;
107///
108/// let code = WarningCode::MalformedJsonLd;
109/// assert_eq!(code.to_string(), "malformed-json-ld");
110/// ```
111#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
112#[non_exhaustive]
113pub enum WarningCode {
114 /// Invalid or unparseable JSON-LD content.
115 MalformedJsonLd,
116 /// Invalid or incomplete Microdata markup.
117 MalformedMicrodata,
118 /// Invalid or incomplete `RDFa` markup.
119 MalformedRdfa,
120 /// An `@id` reference could not be resolved.
121 UnresolvableReference,
122 /// A structured data node has no `@type` / `itemtype` / `typeof`.
123 EmptyType,
124 /// Multiple nodes share the same `@id`.
125 DuplicateId,
126 /// An entire extractor failed (captured so other formats still run).
127 ExtractorFailed,
128}
129
130impl fmt::Display for WarningCode {
131 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
132 match self {
133 Self::MalformedJsonLd => write!(f, "malformed-json-ld"),
134 Self::MalformedMicrodata => write!(f, "malformed-microdata"),
135 Self::MalformedRdfa => write!(f, "malformed-rdfa"),
136 Self::UnresolvableReference => write!(f, "unresolvable-reference"),
137 Self::EmptyType => write!(f, "empty-type"),
138 Self::DuplicateId => write!(f, "duplicate-id"),
139 Self::ExtractorFailed => write!(f, "extractor-failed"),
140 }
141 }
142}