schemaorg_rs/validation/
mod.rs1pub mod diagnostics;
46mod property_checker;
47mod type_checker;
48mod value_checker;
49
50pub use diagnostics::{DiagnosticCode, Severity, ValidationDiagnostic};
51
52use crate::graph::StructuredDataGraph;
53use crate::types::{SchemaNode, SchemaValue};
54use crate::vocabulary;
55
56#[derive(Debug, Clone, Default)]
61#[must_use]
62pub struct ValidationResult {
63 pub diagnostics: Vec<ValidationDiagnostic>,
65}
66
67impl ValidationResult {
68 pub fn errors(&self) -> impl Iterator<Item = &ValidationDiagnostic> {
70 self.diagnostics
71 .iter()
72 .filter(|d| d.severity == Severity::Error)
73 }
74
75 pub fn warnings(&self) -> impl Iterator<Item = &ValidationDiagnostic> {
77 self.diagnostics
78 .iter()
79 .filter(|d| d.severity == Severity::Warning)
80 }
81
82 pub fn infos(&self) -> impl Iterator<Item = &ValidationDiagnostic> {
84 self.diagnostics
85 .iter()
86 .filter(|d| d.severity == Severity::Info)
87 }
88
89 #[must_use]
91 pub fn has_errors(&self) -> bool {
92 self.diagnostics
93 .iter()
94 .any(|d| d.severity == Severity::Error)
95 }
96
97 #[must_use]
99 pub fn has_warnings(&self) -> bool {
100 self.diagnostics
101 .iter()
102 .any(|d| d.severity == Severity::Warning)
103 }
104
105 #[must_use]
107 pub fn len(&self) -> usize {
108 self.diagnostics.len()
109 }
110
111 #[must_use]
113 pub fn is_empty(&self) -> bool {
114 self.diagnostics.is_empty()
115 }
116}
117
118pub fn validate(graph: &StructuredDataGraph) -> ValidationResult {
145 let mut diagnostics = Vec::new();
146 for node in &graph.nodes {
147 let type_label = if node.types.is_empty() {
148 "(unknown)".to_string()
149 } else {
150 node.types.join(", ")
151 };
152 validate_node(node, &type_label, &mut diagnostics);
153 }
154 ValidationResult { diagnostics }
155}
156
157fn validate_node(node: &SchemaNode, path: &str, diagnostics: &mut Vec<ValidationDiagnostic>) {
159 for type_name in &node.types {
161 type_checker::check_type(type_name, path, diagnostics);
162 }
163
164 for (prop_name, values) in &node.properties {
166 let prop_path = format!("{path}.{prop_name}");
167 property_checker::check_property(prop_name, &node.types, &prop_path, diagnostics);
168
169 if let Some(prop_def) = vocabulary::lookup_property(prop_name) {
171 for (i, value) in values.iter().enumerate() {
172 let value_path = if values.len() > 1 {
173 format!("{prop_path}[{i}]")
174 } else {
175 prop_path.clone()
176 };
177
178 value_checker::check_value(
179 value,
180 prop_name,
181 prop_def.expected_types,
182 &value_path,
183 diagnostics,
184 );
185
186 if let SchemaValue::Node(nested) = value {
188 let nested_type_label = if nested.types.is_empty() {
189 format!("{value_path}.(unknown)")
190 } else {
191 format!("{value_path}.{}", nested.types.join(", "))
192 };
193 validate_node(nested, &nested_type_label, diagnostics);
194 }
195 }
196 }
197 }
198}
199
200const MAX_DISTANCE: usize = 3;
203
204const MAX_LENGTH_DIFF: usize = 3;
206
207pub(crate) fn suggest_similar<'a>(input: &str, candidates: &'a [&str]) -> Option<&'a str> {
211 let input_len = input.len();
212
213 candidates
214 .iter()
215 .filter(|c| {
216 let len_diff = if c.len() > input_len {
217 c.len() - input_len
218 } else {
219 input_len - c.len()
220 };
221 len_diff <= MAX_LENGTH_DIFF
222 })
223 .map(|c| (*c, levenshtein(input, c)))
224 .filter(|(_, d)| *d <= MAX_DISTANCE && *d > 0) .min_by_key(|(_, d)| *d)
226 .map(|(c, _)| c)
227}
228
229fn levenshtein(a: &str, b: &str) -> usize {
233 let a_len = a.len();
234 let b_len = b.len();
235
236 if a_len == 0 {
237 return b_len;
238 }
239 if b_len == 0 {
240 return a_len;
241 }
242
243 let (a_bytes, b_bytes) = if a_len < b_len {
245 (b.as_bytes(), a.as_bytes())
246 } else {
247 (a.as_bytes(), b.as_bytes())
248 };
249
250 let b_len = b_bytes.len();
251 let mut row: Vec<usize> = (0..=b_len).collect();
252
253 for (i, a_byte) in a_bytes.iter().enumerate() {
254 let mut prev = i;
255 row[0] = i + 1;
256
257 for (j, b_byte) in b_bytes.iter().enumerate() {
258 let cost = usize::from(!a_byte.eq_ignore_ascii_case(b_byte));
259 let val = (row[j + 1] + 1).min(row[j] + 1).min(prev + cost);
260 prev = row[j + 1];
261 row[j + 1] = val;
262 }
263 }
264
265 row[b_len]
266}
267
268#[cfg(test)]
269mod tests {
270 use super::*;
271
272 #[test]
273 fn levenshtein_basic() {
274 assert_eq!(levenshtein("", ""), 0);
275 assert_eq!(levenshtein("a", ""), 1);
276 assert_eq!(levenshtein("", "a"), 1);
277 assert_eq!(levenshtein("kitten", "sitting"), 3);
278 assert_eq!(levenshtein("Product", "Produc"), 1);
279 assert_eq!(levenshtein("Product", "product"), 0); assert_eq!(levenshtein("name", "namee"), 1);
281 }
282
283 #[test]
284 fn suggest_similar_finds_close_match() {
285 let candidates = &["Product", "Person", "Place", "Event"];
286 assert_eq!(suggest_similar("Produc", candidates), Some("Product"));
287 assert_eq!(suggest_similar("Prduct", candidates), Some("Product"));
288 assert_eq!(suggest_similar("Perso", candidates), Some("Person"));
289 }
290
291 #[test]
292 fn suggest_similar_none_for_distant() {
293 let candidates = &["Product", "Person", "Place"];
294 assert_eq!(suggest_similar("XYZ123", candidates), None);
295 }
296
297 #[test]
298 fn suggest_similar_none_for_exact() {
299 let candidates = &["Product", "Person"];
300 assert_eq!(suggest_similar("Product", candidates), None);
301 }
302}