Skip to main content

xdoc/schema/
mod.rs

1//! Structural XML contracts and validation.
2//!
3//! This module implements a small, engine-native contract layer. It is not an
4//! XSD implementation; the types are intentionally shaped so an XSD adapter can
5//! be added later without making `schema` depend on any XML domain.
6
7use crate::core::{Document, ErrorKind, NodeId, NodeKind, XmlError, XmlResult};
8use crate::query::{NamespaceContext, Query, QueryValue};
9
10pub type CustomRule = Box<dyn Fn(&Document) -> XmlResult<Vec<ValidationIssue>>>;
11
12pub struct XmlContract {
13    name: String,
14    namespaces: NamespaceContext,
15    rules: Vec<ContractRule>,
16    custom_rules: Vec<CustomRule>,
17}
18
19impl XmlContract {
20    pub fn new(name: impl Into<String>) -> Self {
21        Self {
22            name: name.into(),
23            namespaces: NamespaceContext::new(),
24            rules: Vec::new(),
25            custom_rules: Vec::new(),
26        }
27    }
28
29    pub fn name(&self) -> &str {
30        &self.name
31    }
32
33    pub fn with_namespace(
34        mut self,
35        alias: impl Into<String>,
36        uri: impl Into<String>,
37    ) -> XmlResult<Self> {
38        self.namespaces = self.namespaces.with_alias(alias, uri)?;
39        Ok(self)
40    }
41
42    pub fn required(mut self, path: impl AsRef<str>) -> XmlResult<Self> {
43        self.rules
44            .push(ContractRule::Required(CompiledPath::new(path)?));
45        Ok(self)
46    }
47
48    pub fn cardinality(
49        mut self,
50        path: impl AsRef<str>,
51        min: usize,
52        max: Option<usize>,
53    ) -> XmlResult<Self> {
54        if max.is_some_and(|max| min > max) {
55            return Err(schema_error(format!(
56                "invalid cardinality for `{}`: min cannot be greater than max",
57                path.as_ref()
58            )));
59        }
60
61        self.rules.push(ContractRule::Cardinality {
62            path: CompiledPath::new(path)?,
63            min,
64            max,
65        });
66        Ok(self)
67    }
68
69    pub fn text_type(mut self, path: impl AsRef<str>, value_type: ValueType) -> XmlResult<Self> {
70        self.rules.push(ContractRule::TextType {
71            path: CompiledPath::new(path)?,
72            value_type,
73        });
74        Ok(self)
75    }
76
77    pub fn enum_value(
78        mut self,
79        path: impl AsRef<str>,
80        values: impl IntoIterator<Item = impl Into<String>>,
81    ) -> XmlResult<Self> {
82        let values = values.into_iter().map(Into::into).collect::<Vec<_>>();
83        if values.is_empty() {
84            return Err(schema_error(format!(
85                "enum rule for `{}` requires at least one value",
86                path.as_ref()
87            )));
88        }
89
90        self.rules.push(ContractRule::EnumValue {
91            path: CompiledPath::new(path)?,
92            values,
93        });
94        Ok(self)
95    }
96
97    pub fn rule(
98        mut self,
99        rule: impl Fn(&Document) -> XmlResult<Vec<ValidationIssue>> + 'static,
100    ) -> Self {
101        self.custom_rules.push(Box::new(rule));
102        self
103    }
104
105    pub fn validate(&self, document: &Document) -> XmlResult<ValidationReport> {
106        let mut report = ValidationReport::new(self.name.clone());
107
108        for rule in &self.rules {
109            rule.validate(document, &self.namespaces, &mut report)?;
110        }
111
112        for rule in &self.custom_rules {
113            for issue in rule(document)? {
114                report.push(issue);
115            }
116        }
117
118        Ok(report)
119    }
120}
121
122#[derive(Debug, Clone, PartialEq, Eq)]
123pub enum ValueType {
124    String,
125    Integer,
126    Decimal,
127    Boolean,
128}
129
130#[derive(Debug, Clone, PartialEq, Eq)]
131pub struct ValidationReport {
132    contract_name: String,
133    issues: Vec<ValidationIssue>,
134}
135
136impl ValidationReport {
137    pub fn new(contract_name: impl Into<String>) -> Self {
138        Self {
139            contract_name: contract_name.into(),
140            issues: Vec::new(),
141        }
142    }
143
144    pub fn contract_name(&self) -> &str {
145        &self.contract_name
146    }
147
148    pub fn is_valid(&self) -> bool {
149        !self
150            .issues
151            .iter()
152            .any(|issue| issue.severity == ValidationSeverity::Error)
153    }
154
155    pub fn issues(&self) -> &[ValidationIssue] {
156        &self.issues
157    }
158
159    pub fn errors(&self) -> impl Iterator<Item = &ValidationIssue> {
160        self.issues
161            .iter()
162            .filter(|issue| issue.severity == ValidationSeverity::Error)
163    }
164
165    pub fn warnings(&self) -> impl Iterator<Item = &ValidationIssue> {
166        self.issues
167            .iter()
168            .filter(|issue| issue.severity == ValidationSeverity::Warning)
169    }
170
171    pub fn push(&mut self, issue: ValidationIssue) {
172        self.issues.push(issue);
173    }
174}
175
176#[derive(Debug, Clone, PartialEq, Eq)]
177pub struct ValidationIssue {
178    severity: ValidationSeverity,
179    path: String,
180    message: String,
181}
182
183impl ValidationIssue {
184    pub fn error(path: impl Into<String>, message: impl Into<String>) -> Self {
185        Self {
186            severity: ValidationSeverity::Error,
187            path: path.into(),
188            message: message.into(),
189        }
190    }
191
192    pub fn warning(path: impl Into<String>, message: impl Into<String>) -> Self {
193        Self {
194            severity: ValidationSeverity::Warning,
195            path: path.into(),
196            message: message.into(),
197        }
198    }
199
200    pub fn severity(&self) -> &ValidationSeverity {
201        &self.severity
202    }
203
204    pub fn path(&self) -> &str {
205        &self.path
206    }
207
208    pub fn message(&self) -> &str {
209        &self.message
210    }
211}
212
213pub type ValidationError = ValidationIssue;
214
215#[derive(Debug, Clone, PartialEq, Eq)]
216pub enum ValidationSeverity {
217    Error,
218    Warning,
219}
220
221pub trait XsdContractAdapter {
222    fn contract_name(&self) -> &str;
223    fn into_contract(self) -> XmlResult<XmlContract>;
224}
225
226#[derive(Debug, Clone, PartialEq, Eq)]
227enum ContractRule {
228    Required(CompiledPath),
229    Cardinality {
230        path: CompiledPath,
231        min: usize,
232        max: Option<usize>,
233    },
234    TextType {
235        path: CompiledPath,
236        value_type: ValueType,
237    },
238    EnumValue {
239        path: CompiledPath,
240        values: Vec<String>,
241    },
242}
243
244impl ContractRule {
245    fn validate(
246        &self,
247        document: &Document,
248        namespaces: &NamespaceContext,
249        report: &mut ValidationReport,
250    ) -> XmlResult<()> {
251        match self {
252            Self::Required(path) => {
253                let result = path.query.evaluate_with_context(document, namespaces)?;
254                if result.is_empty() {
255                    report.push(ValidationIssue::error(
256                        path.source(),
257                        format!("required path `{}` was not found", path.source()),
258                    ));
259                }
260            }
261            Self::Cardinality { path, min, max } => {
262                let count = path
263                    .query
264                    .evaluate_with_context(document, namespaces)?
265                    .len();
266                if count < *min {
267                    report.push(ValidationIssue::error(
268                        path.source(),
269                        format!(
270                            "path `{}` expected at least {} match(es), found {}",
271                            path.source(),
272                            min,
273                            count
274                        ),
275                    ));
276                }
277                if let Some(max) = max {
278                    if count > *max {
279                        report.push(ValidationIssue::error(
280                            path.source(),
281                            format!(
282                                "path `{}` expected at most {} match(es), found {}",
283                                path.source(),
284                                max,
285                                count
286                            ),
287                        ));
288                    }
289                }
290            }
291            Self::TextType { path, value_type } => {
292                for value in text_values(document, path, namespaces)? {
293                    if !value_type.matches(&value) {
294                        report.push(ValidationIssue::error(
295                            path.source(),
296                            format!(
297                                "value `{}` at `{}` is not a valid {:?}",
298                                value,
299                                path.source(),
300                                value_type
301                            ),
302                        ));
303                    }
304                }
305            }
306            Self::EnumValue { path, values } => {
307                for value in text_values(document, path, namespaces)? {
308                    if !values.iter().any(|allowed| allowed == &value) {
309                        report.push(ValidationIssue::error(
310                            path.source(),
311                            format!(
312                                "value `{}` at `{}` is not one of [{}]",
313                                value,
314                                path.source(),
315                                values.join(", ")
316                            ),
317                        ));
318                    }
319                }
320            }
321        }
322
323        Ok(())
324    }
325}
326
327impl ValueType {
328    fn matches(&self, value: &str) -> bool {
329        let value = value.trim();
330        match self {
331            Self::String => true,
332            Self::Integer => value.parse::<i64>().is_ok(),
333            Self::Decimal => value.parse::<f64>().is_ok(),
334            Self::Boolean => matches!(value, "true" | "false" | "1" | "0"),
335        }
336    }
337}
338
339#[derive(Debug, Clone, PartialEq, Eq)]
340struct CompiledPath {
341    source: String,
342    query: Query,
343}
344
345impl CompiledPath {
346    fn new(path: impl AsRef<str>) -> XmlResult<Self> {
347        let source = path.as_ref().to_owned();
348        Ok(Self {
349            query: Query::parse(&source)?,
350            source,
351        })
352    }
353
354    fn source(&self) -> &str {
355        &self.source
356    }
357}
358
359fn text_values(
360    document: &Document,
361    path: &CompiledPath,
362    namespaces: &NamespaceContext,
363) -> XmlResult<Vec<String>> {
364    let result = path.query.evaluate_with_context(document, namespaces)?;
365    let mut values = Vec::new();
366
367    for value in result.values() {
368        match value {
369            QueryValue::Text(value) | QueryValue::Attribute { value, .. } => {
370                values.push(value.clone());
371            }
372            QueryValue::Node(id) => values.push(direct_text(document, *id)?),
373        }
374    }
375
376    Ok(values)
377}
378
379fn direct_text(document: &Document, node_id: NodeId) -> XmlResult<String> {
380    let mut value = String::new();
381    let node = document.node(node_id)?;
382    match node.kind() {
383        NodeKind::Text(text) | NodeKind::CData(text) => value.push_str(text),
384        NodeKind::Element(element) => {
385            for child in element.children() {
386                match document.node(*child)?.kind() {
387                    NodeKind::Text(text) | NodeKind::CData(text) => value.push_str(text),
388                    _ => {}
389                }
390            }
391        }
392        NodeKind::Comment(_) | NodeKind::ProcessingInstruction { .. } => {}
393    }
394    Ok(value)
395}
396
397fn schema_error(message: impl Into<String>) -> XmlError {
398    XmlError::new(ErrorKind::Validation, message)
399}
400
401#[cfg(test)]
402mod tests {
403    use super::*;
404    use crate::parser;
405    use crate::query::DocumentQueryExt;
406
407    fn valid_document() -> XmlResult<Document> {
408        parser::parse_str(
409            r#"<Root>
410                <Header>
411                    <ID>DOC-1</ID>
412                    <Status>draft</Status>
413                </Header>
414                <Lines>
415                    <Line code="A1"><Quantity>2</Quantity><Amount>10.50</Amount></Line>
416                    <Line code="B2"><Quantity>4</Quantity><Amount>20.00</Amount></Line>
417                </Lines>
418                <Approved>true</Approved>
419            </Root>"#,
420        )
421    }
422
423    #[test]
424    fn schema_contract_validates_correct_document() -> XmlResult<()> {
425        let document = valid_document()?;
426        let contract = XmlContract::new("Example")
427            .required("/Root/Header/ID")?
428            .cardinality("/Root/Lines/Line", 1, Some(3))?
429            .text_type("/Root/Lines/Line/Quantity", ValueType::Integer)?
430            .text_type("/Root/Lines/Line/Amount", ValueType::Decimal)?
431            .text_type("/Root/Approved", ValueType::Boolean)?
432            .enum_value("/Root/Header/Status", ["draft", "final"])?;
433
434        let report = contract.validate(&document)?;
435
436        assert!(report.is_valid());
437        assert_eq!(report.contract_name(), "Example");
438        assert!(report.issues().is_empty());
439        Ok(())
440    }
441
442    #[test]
443    fn schema_required_reports_missing_path() -> XmlResult<()> {
444        let document = valid_document()?;
445        let contract = XmlContract::new("Example").required("/Root/Header/Missing")?;
446
447        let report = contract.validate(&document)?;
448
449        assert!(!report.is_valid());
450        let error = report.errors().next().expect("required error");
451        assert_eq!(error.path(), "/Root/Header/Missing");
452        assert!(error.message().contains("required path"));
453        Ok(())
454    }
455
456    #[test]
457    fn schema_cardinality_reports_minimum_and_maximum() -> XmlResult<()> {
458        let document = valid_document()?;
459        let contract = XmlContract::new("Example")
460            .cardinality("/Root/Lines/Line", 3, None)?
461            .cardinality("/Root/Lines/Line", 0, Some(1))?;
462
463        let report = contract.validate(&document)?;
464        let messages = report
465            .errors()
466            .map(ValidationIssue::message)
467            .collect::<Vec<_>>();
468
469        assert_eq!(messages.len(), 2);
470        assert!(messages
471            .iter()
472            .any(|message| message.contains("at least 3")));
473        assert!(messages.iter().any(|message| message.contains("at most 1")));
474        Ok(())
475    }
476
477    #[test]
478    fn schema_types_report_invalid_values() -> XmlResult<()> {
479        let document = parser::parse_str("<Root><Quantity>abc</Quantity></Root>")?;
480        let contract = XmlContract::new("Example")
481            .text_type("/Root/Quantity", ValueType::Integer)?
482            .text_type("/Root/Quantity", ValueType::String)?;
483
484        let report = contract.validate(&document)?;
485
486        assert!(!report.is_valid());
487        assert_eq!(report.errors().count(), 1);
488        assert!(report.issues()[0].message().contains("Integer"));
489        Ok(())
490    }
491
492    #[test]
493    fn schema_enum_reports_invalid_values() -> XmlResult<()> {
494        let document = parser::parse_str("<Root><Status>archived</Status></Root>")?;
495        let contract =
496            XmlContract::new("Example").enum_value("/Root/Status", ["draft", "final"])?;
497
498        let report = contract.validate(&document)?;
499
500        assert!(!report.is_valid());
501        assert!(report.issues()[0].message().contains("not one of"));
502        Ok(())
503    }
504
505    #[test]
506    fn schema_custom_rule_can_return_error_with_path() -> XmlResult<()> {
507        let document = valid_document()?;
508        let contract = XmlContract::new("Example").rule(|document| {
509            if document.query("/Root/Header/ID")?.is_empty() {
510                Ok(vec![ValidationIssue::error(
511                    "/Root/Header/ID",
512                    "ID must be present",
513                )])
514            } else {
515                Ok(vec![ValidationIssue::warning(
516                    "/Root/Header/ID",
517                    "custom rule was evaluated",
518                )])
519            }
520        });
521
522        let report = contract.validate(&document)?;
523
524        assert!(report.is_valid());
525        let warning = report.warnings().next().expect("custom warning");
526        assert_eq!(warning.path(), "/Root/Header/ID");
527        assert_eq!(warning.severity(), &ValidationSeverity::Warning);
528        Ok(())
529    }
530
531    #[test]
532    fn schema_namespaces_use_query_context() -> XmlResult<()> {
533        let document = parser::parse_str(
534            r#"<doc:Root xmlns:doc="urn:doc"><doc:ID>DOC-1</doc:ID></doc:Root>"#,
535        )?;
536        let contract = XmlContract::new("Namespaced")
537            .with_namespace("d", "urn:doc")?
538            .required("/d:Root/d:ID")?;
539
540        let report = contract.validate(&document)?;
541
542        assert!(report.is_valid());
543        Ok(())
544    }
545
546    #[test]
547    fn schema_invalid_cardinality_is_validation_error() {
548        let error = match XmlContract::new("Example").cardinality("/Root/Line", 2, Some(1)) {
549            Ok(_) => panic!("invalid cardinality must fail"),
550            Err(error) => error,
551        };
552
553        assert_eq!(error.kind(), &ErrorKind::Validation);
554        assert!(error.message().contains("min cannot be greater"));
555    }
556}