Skip to main content

mx20022_codegen/xsd/
parser.rs

1//! XSD parser built on [`quick_xml`] streaming events.
2//!
3//! The parser drives a simple recursive-descent state machine over the flat
4//! event stream produced by [`quick_xml::Reader`].  No DOM tree is built; each
5//! recognised element is accumulated directly into the [`Schema`] being
6//! constructed.
7//!
8//! # Supported constructs
9//!
10//! | XSD construct | Rust mapping |
11//! |---|---|
12//! | `<xs:element name="…" type="…"/>` (top-level) | [`Element`] |
13//! | `<xs:simpleType>` + `<xs:restriction>` | [`SimpleType`] / [`Restriction`] |
14//! | Facets: `enumeration`, `pattern`, `minLength`, `maxLength`, `minInclusive`, `maxInclusive`, `totalDigits`, `fractionDigits` | [`Facet`] variants |
15//! | `<xs:complexType>` + `<xs:sequence>` | `ComplexContent::Sequence` |
16//! | `<xs:complexType>` + `<xs:choice>` | `ComplexContent::Choice` |
17//! | `<xs:complexType>` + `<xs:simpleContent><xs:extension>` | `ComplexContent::SimpleContent` |
18//! | `<xs:complexType>` + `<xs:any>` | `ComplexContent::Any` |
19
20use std::io::BufRead;
21
22use quick_xml::{
23    events::{BytesStart, Event},
24    Reader,
25};
26
27use super::types::{
28    Attribute, ChoiceVariant, ComplexContent, ComplexType, Element, Facet, MaxOccurs, Restriction,
29    Schema, SequenceElement, SimpleType,
30};
31
32/// Errors that can occur while parsing an XSD file.
33#[derive(Debug, thiserror::Error)]
34pub enum ParseError {
35    /// An I/O or encoding error from quick-xml.
36    #[error("xml error: {0}")]
37    Xml(#[from] quick_xml::Error),
38
39    /// A required XML attribute was missing from an element.
40    #[error("missing required attribute '{attr}' on <{element}>")]
41    MissingAttribute {
42        /// The XSD element local name.
43        element: &'static str,
44        /// The attribute name that was expected.
45        attr: &'static str,
46    },
47
48    /// An attribute value could not be parsed as the expected type.
49    #[error("invalid value '{value}' for attribute '{attr}': {reason}")]
50    InvalidAttributeValue {
51        /// The raw attribute value.
52        value: String,
53        /// The attribute name.
54        attr: &'static str,
55        /// Human-readable explanation.
56        reason: &'static str,
57    },
58
59    /// The `<xs:schema>` root element was not found.
60    #[error("missing <xs:schema> root element")]
61    MissingSchemaRoot,
62
63    /// UTF-8 decoding failure for an attribute value.
64    #[error("utf-8 error in attribute value: {0}")]
65    Utf8(#[from] std::str::Utf8Error),
66}
67
68// ---------------------------------------------------------------------------
69// Internal state machine types
70// ---------------------------------------------------------------------------
71
72/// High-level parser context — what the parser is currently building.
73#[derive(Debug)]
74enum Context {
75    /// At the top level of `<xs:schema>`.
76    Schema,
77    /// Inside a `<xs:simpleType>`.
78    SimpleType(SimpleTypeBuilder),
79    /// Inside a `<xs:complexType>`.
80    ComplexType(ComplexTypeBuilder),
81}
82
83/// Builder state for a `<xs:simpleType>` definition.
84///
85/// `nesting_depth` counts how many Start events have been seen since we entered
86/// the `<xs:simpleType>` element.  The matching `</xs:simpleType>` End event is
87/// recognised by `nesting_depth` reaching 0.
88#[derive(Debug, Default)]
89struct SimpleTypeBuilder {
90    name: String,
91    base: String,
92    facets: Vec<Facet>,
93    /// Number of open child Start tags inside this type context.
94    nesting_depth: u32,
95}
96
97/// Builder state for a `<xs:complexType>` definition.
98///
99/// Same depth-tracking convention as [`SimpleTypeBuilder`].
100#[derive(Debug, Default)]
101struct ComplexTypeBuilder {
102    name: String,
103    content: ComplexContentBuilder,
104    /// Number of open child Start tags inside this type context.
105    nesting_depth: u32,
106}
107
108/// The content model being assembled inside a [`ComplexTypeBuilder`].
109#[derive(Debug, Default)]
110enum ComplexContentBuilder {
111    #[default]
112    Empty,
113    Sequence(Vec<SequenceElement>),
114    Choice(Vec<ChoiceVariant>),
115    SimpleContent {
116        base: String,
117        attributes: Vec<Attribute>,
118        /// True once the `<xs:extension>` open-tag has been seen.
119        in_extension: bool,
120    },
121    Any {
122        namespace: Option<String>,
123    },
124}
125
126// ---------------------------------------------------------------------------
127// Public entry points
128// ---------------------------------------------------------------------------
129
130/// Parse an XSD schema from any [`BufRead`] source.
131///
132/// # Errors
133///
134/// Returns [`ParseError`] if the XML is malformed or a required XSD attribute
135/// is missing.
136///
137/// # Example
138///
139/// ```no_run
140/// use std::{fs::File, io::BufReader};
141/// use mx20022_codegen::xsd::parser::parse;
142///
143/// let file = File::open("schemas/head/head.001.001.04.xsd").unwrap();
144/// let schema = parse(BufReader::new(file)).unwrap();
145/// println!("namespace: {}", schema.target_namespace);
146/// ```
147pub fn parse<R: BufRead>(reader: R) -> Result<Schema, ParseError> {
148    parse_internal(Reader::from_reader(reader))
149}
150
151/// Parse an XSD schema from a string slice.
152///
153/// Primarily useful in tests.
154///
155/// # Errors
156///
157/// Returns [`ParseError`] on malformed XML or missing required attributes.
158pub fn parse_str(xml: &str) -> Result<Schema, ParseError> {
159    parse_internal(Reader::from_str(xml))
160}
161
162// ---------------------------------------------------------------------------
163// Core parser loop
164// ---------------------------------------------------------------------------
165
166fn parse_internal<R: BufRead>(mut reader: Reader<R>) -> Result<Schema, ParseError> {
167    reader.config_mut().trim_text(true);
168
169    let mut schema = Schema {
170        target_namespace: String::new(),
171        elements: Vec::new(),
172        simple_types: Vec::new(),
173        complex_types: Vec::new(),
174    };
175
176    let mut buf = Vec::new();
177    let mut ctx_stack: Vec<Context> = Vec::new();
178    let mut found_schema_root = false;
179
180    loop {
181        match reader.read_event_into(&mut buf)? {
182            Event::Start(ref e) => {
183                on_start(e, &mut ctx_stack, &mut schema, &mut found_schema_root)?;
184            }
185            Event::Empty(ref e) => {
186                on_empty(e, &mut ctx_stack, &mut schema, &mut found_schema_root)?;
187            }
188            Event::End(_) => {
189                on_end(&mut ctx_stack, &mut schema);
190            }
191            Event::Eof => break,
192            _ => {}
193        }
194        buf.clear();
195    }
196
197    if !found_schema_root {
198        return Err(ParseError::MissingSchemaRoot);
199    }
200
201    Ok(schema)
202}
203
204// ---------------------------------------------------------------------------
205// Event: Start (opening tag with children)
206// ---------------------------------------------------------------------------
207
208fn on_start(
209    e: &BytesStart<'_>,
210    ctx_stack: &mut Vec<Context>,
211    schema: &mut Schema,
212    found_schema_root: &mut bool,
213) -> Result<(), ParseError> {
214    let name_binding = e.name();
215    let local = local_name(name_binding.as_ref());
216
217    match ctx_stack.last_mut() {
218        // ---- No context yet (or schema root already open) ----
219        None => {
220            if local == "schema" {
221                *found_schema_root = true;
222                schema.target_namespace = attr_value(e, "targetNamespace")?.unwrap_or_default();
223                ctx_stack.push(Context::Schema);
224            }
225            // Everything else at the root level before <xs:schema> is ignored.
226        }
227
228        // ---- Inside <xs:schema> ----
229        Some(Context::Schema) => match local {
230            "simpleType" => {
231                let name = require_attr(e, "name", "xs:simpleType")?;
232                ctx_stack.push(Context::SimpleType(SimpleTypeBuilder {
233                    name,
234                    ..Default::default()
235                }));
236            }
237            "complexType" => {
238                let name = require_attr(e, "name", "xs:complexType")?;
239                ctx_stack.push(Context::ComplexType(ComplexTypeBuilder {
240                    name,
241                    ..Default::default()
242                }));
243            }
244            "element" => {
245                // Top-level element: `<xs:element name="…" type="…">` (with children — unusual).
246                // We still capture it but then push a dummy skip context so End pops cleanly.
247                on_top_level_element(e, schema)?;
248                // Push Schema context so the End event doesn't disturb us — actually we
249                // need depth tracking. Increment schema "skip" would break things. Instead
250                // we track this via an incremented nesting on the parent. Since Schema doesn't
251                // have depth, we just note: the matching End will hit on_end with Some(Schema)
252                // and we'll do nothing (Schema never pops on End).
253                // Actually — on_end only pops SimpleType/ComplexType contexts.  Schema just
254                // returns.  So we're fine: the End of <xs:element> inside Schema is a no-op.
255            }
256            _ => {
257                // Annotation, import, etc. — ignore.  Their End is also a no-op.
258            }
259        },
260
261        // ---- Inside <xs:simpleType> ----
262        Some(Context::SimpleType(ref mut b)) => {
263            // Every Start inside a simpleType increments nesting depth.
264            b.nesting_depth += 1;
265
266            // Only act on the direct children (nesting_depth == 1).
267            if b.nesting_depth == 1 && local == "restriction" {
268                b.base = attr_value(e, "base")?.unwrap_or_default();
269            }
270            // Facets (minLength etc.) are self-closing (Empty events) so we
271            // don't need Start handlers for them.
272        }
273
274        // ---- Inside <xs:complexType> ----
275        Some(Context::ComplexType(ref mut b)) => {
276            b.nesting_depth += 1;
277
278            // Act on direct children (nesting_depth == 1) to set up the content model,
279            // and on grand-children (nesting_depth == 2) for extension inside simpleContent.
280            match (b.nesting_depth, local) {
281                (1, "sequence") => {
282                    b.content = ComplexContentBuilder::Sequence(Vec::new());
283                }
284                (1, "choice") => {
285                    b.content = ComplexContentBuilder::Choice(Vec::new());
286                }
287                (1, "simpleContent") => {
288                    b.content = ComplexContentBuilder::SimpleContent {
289                        base: String::new(),
290                        attributes: Vec::new(),
291                        in_extension: false,
292                    };
293                }
294                (2, "extension") => {
295                    if let ComplexContentBuilder::SimpleContent {
296                        ref mut base,
297                        ref mut in_extension,
298                        ..
299                    } = b.content
300                    {
301                        *base = attr_value(e, "base")?.unwrap_or_default();
302                        *in_extension = true;
303                    }
304                }
305                // Element inside sequence/choice: handled as Empty in ISO 20022 XSDs
306                // but guard for Start+End form too.
307                (2, "element") => {
308                    on_element_inside_complex(e, b)?;
309                }
310                _ => {}
311            }
312        }
313    }
314
315    Ok(())
316}
317
318// ---------------------------------------------------------------------------
319// Event: Empty (self-closing tag)
320// ---------------------------------------------------------------------------
321
322fn on_empty(
323    e: &BytesStart<'_>,
324    ctx_stack: &mut [Context],
325    schema: &mut Schema,
326    found_schema_root: &mut bool,
327) -> Result<(), ParseError> {
328    let name_binding = e.name();
329    let local = local_name(name_binding.as_ref());
330
331    match ctx_stack.last_mut() {
332        None => {
333            if local == "schema" {
334                *found_schema_root = true;
335                schema.target_namespace = attr_value(e, "targetNamespace")?.unwrap_or_default();
336            }
337        }
338
339        Some(Context::Schema) => {
340            if local == "element" {
341                on_top_level_element(e, schema)?;
342            }
343            // simpleType/complexType as empties would have no content, skip them.
344        }
345
346        Some(Context::SimpleType(ref mut b)) => {
347            // Self-closing tags inside a simpleType: facets and bare restriction.
348            match local {
349                "restriction" => {
350                    // <xs:restriction base="xs:boolean"/> with no facets.
351                    b.base = attr_value(e, "base")?.unwrap_or_default();
352                }
353                "enumeration" => {
354                    let v = attr_value(e, "value")?.unwrap_or_default();
355                    b.facets.push(Facet::Enumeration(v));
356                }
357                "pattern" => {
358                    let v = attr_value(e, "value")?.unwrap_or_default();
359                    b.facets.push(Facet::Pattern(v));
360                }
361                "minLength" => {
362                    let v = attr_value(e, "value")?.unwrap_or_default();
363                    b.facets.push(Facet::MinLength(parse_u64(&v, "minLength")?));
364                }
365                "maxLength" => {
366                    let v = attr_value(e, "value")?.unwrap_or_default();
367                    b.facets.push(Facet::MaxLength(parse_u64(&v, "maxLength")?));
368                }
369                "minInclusive" => {
370                    let v = attr_value(e, "value")?.unwrap_or_default();
371                    b.facets.push(Facet::MinInclusive(v));
372                }
373                "maxInclusive" => {
374                    let v = attr_value(e, "value")?.unwrap_or_default();
375                    b.facets.push(Facet::MaxInclusive(v));
376                }
377                "totalDigits" => {
378                    let v = attr_value(e, "value")?.unwrap_or_default();
379                    b.facets
380                        .push(Facet::TotalDigits(parse_u32(&v, "totalDigits")?));
381                }
382                "fractionDigits" => {
383                    let v = attr_value(e, "value")?.unwrap_or_default();
384                    b.facets
385                        .push(Facet::FractionDigits(parse_u32(&v, "fractionDigits")?));
386                }
387                _ => {}
388            }
389        }
390
391        Some(Context::ComplexType(ref mut b)) => match local {
392            "element" => {
393                on_element_inside_complex(e, b)?;
394            }
395            "any" => {
396                let namespace = attr_value(e, "namespace")?;
397                b.content = ComplexContentBuilder::Any { namespace };
398            }
399            "attribute" => {
400                on_attribute_inside_complex(e, b)?;
401            }
402            _ => {}
403        },
404    }
405
406    Ok(())
407}
408
409// ---------------------------------------------------------------------------
410// Event: End (closing tag)
411// ---------------------------------------------------------------------------
412
413/// Called for every closing tag.
414///
415/// The context stack only has `SimpleType` and `ComplexType` entries while we
416/// are inside those types.  We use `nesting_depth` to distinguish between end
417/// tags of children (decrement) vs. the end tag of the type itself (pop and emit).
418fn on_end(ctx_stack: &mut Vec<Context>, schema: &mut Schema) {
419    match ctx_stack.last_mut() {
420        Some(Context::SimpleType(ref mut b)) => {
421            if b.nesting_depth > 0 {
422                b.nesting_depth -= 1;
423                // Child's end tag — stay in context.
424                return;
425            }
426            // nesting_depth == 0 means this is </xs:simpleType> itself.
427        }
428        Some(Context::ComplexType(ref mut b)) => {
429            if b.nesting_depth > 0 {
430                b.nesting_depth -= 1;
431                return;
432            }
433            // nesting_depth == 0 means this is </xs:complexType> itself.
434        }
435        // Schema end tag — pop the Schema context.
436        Some(Context::Schema) => {
437            ctx_stack.pop();
438            return;
439        }
440        None => return,
441    }
442
443    // Pop the context and emit the finished type.
444    let ctx = ctx_stack.pop().expect("checked above");
445    match ctx {
446        Context::SimpleType(b) => {
447            schema.simple_types.push(SimpleType {
448                name: b.name,
449                restriction: Restriction {
450                    base: b.base,
451                    facets: b.facets,
452                },
453            });
454        }
455        Context::ComplexType(b) => {
456            let content = finish_complex_content(b.content);
457            schema.complex_types.push(ComplexType {
458                name: b.name,
459                content,
460            });
461        }
462        Context::Schema => unreachable!("handled above"),
463    }
464}
465
466// ---------------------------------------------------------------------------
467// Sub-handlers
468// ---------------------------------------------------------------------------
469
470/// Process a top-level `<xs:element name="…" type="…">` declaration.
471fn on_top_level_element(e: &BytesStart<'_>, schema: &mut Schema) -> Result<(), ParseError> {
472    if let (Some(name), Some(type_name)) = (attr_value(e, "name")?, attr_value(e, "type")?) {
473        schema.elements.push(Element { name, type_name });
474    }
475    Ok(())
476}
477
478/// Process an `<xs:element>` inside a `<xs:sequence>` or `<xs:choice>`.
479fn on_element_inside_complex(
480    e: &BytesStart<'_>,
481    b: &mut ComplexTypeBuilder,
482) -> Result<(), ParseError> {
483    match b.content {
484        ComplexContentBuilder::Sequence(ref mut seq) => {
485            if let (Some(name), Some(type_name)) = (attr_value(e, "name")?, attr_value(e, "type")?)
486            {
487                let min_occurs = parse_min_occurs(e)?;
488                let max_occurs = parse_max_occurs(e)?;
489                seq.push(SequenceElement {
490                    name,
491                    type_name,
492                    min_occurs,
493                    max_occurs,
494                });
495            }
496        }
497        ComplexContentBuilder::Choice(ref mut variants) => {
498            if let (Some(name), Some(type_name)) = (attr_value(e, "name")?, attr_value(e, "type")?)
499            {
500                variants.push(ChoiceVariant { name, type_name });
501            }
502        }
503        _ => {}
504    }
505    Ok(())
506}
507
508/// Process an `<xs:attribute>` inside a `<xs:extension>`.
509fn on_attribute_inside_complex(
510    e: &BytesStart<'_>,
511    b: &mut ComplexTypeBuilder,
512) -> Result<(), ParseError> {
513    if let ComplexContentBuilder::SimpleContent {
514        ref mut attributes,
515        in_extension,
516        ..
517    } = b.content
518    {
519        if in_extension {
520            if let (Some(name), Some(type_name)) = (attr_value(e, "name")?, attr_value(e, "type")?)
521            {
522                let required = attr_value(e, "use")?.is_some_and(|v| v == "required");
523                attributes.push(Attribute {
524                    name,
525                    type_name,
526                    required,
527                });
528            }
529        }
530    }
531    Ok(())
532}
533
534// ---------------------------------------------------------------------------
535// Attribute helpers
536// ---------------------------------------------------------------------------
537
538/// Return an attribute value as an owned `String`, or `None` if absent.
539fn attr_value(e: &BytesStart<'_>, name: &str) -> Result<Option<String>, ParseError> {
540    for attr in e.attributes().flatten() {
541        if local_name(attr.key.as_ref()) == name {
542            let val = std::str::from_utf8(attr.value.as_ref())?.to_owned();
543            return Ok(Some(val));
544        }
545    }
546    Ok(None)
547}
548
549/// Return an attribute value, returning `MissingAttribute` if absent.
550fn require_attr(
551    e: &BytesStart<'_>,
552    attr: &'static str,
553    element: &'static str,
554) -> Result<String, ParseError> {
555    attr_value(e, attr)?.ok_or(ParseError::MissingAttribute { element, attr })
556}
557
558/// Parse the `minOccurs` attribute, defaulting to `1`.
559fn parse_min_occurs(e: &BytesStart<'_>) -> Result<u32, ParseError> {
560    match attr_value(e, "minOccurs")? {
561        None => Ok(1),
562        Some(v) => parse_u32(&v, "minOccurs"),
563    }
564}
565
566/// Parse the `maxOccurs` attribute, defaulting to `Bounded(1)`.
567fn parse_max_occurs(e: &BytesStart<'_>) -> Result<MaxOccurs, ParseError> {
568    match attr_value(e, "maxOccurs")? {
569        None => Ok(MaxOccurs::Bounded(1)),
570        Some(ref v) if v == "unbounded" => Ok(MaxOccurs::Unbounded),
571        Some(v) => parse_u32(&v, "maxOccurs").map(MaxOccurs::Bounded),
572    }
573}
574
575// ---------------------------------------------------------------------------
576// Numeric parsing helpers
577// ---------------------------------------------------------------------------
578
579fn parse_u32(s: &str, attr: &'static str) -> Result<u32, ParseError> {
580    s.parse::<u32>()
581        .map_err(|_| ParseError::InvalidAttributeValue {
582            value: s.to_owned(),
583            attr,
584            reason: "expected non-negative integer",
585        })
586}
587
588fn parse_u64(s: &str, attr: &'static str) -> Result<u64, ParseError> {
589    s.parse::<u64>()
590        .map_err(|_| ParseError::InvalidAttributeValue {
591            value: s.to_owned(),
592            attr,
593            reason: "expected non-negative integer",
594        })
595}
596
597// ---------------------------------------------------------------------------
598// Misc helpers
599// ---------------------------------------------------------------------------
600
601/// Strip the namespace prefix from a qualified name (e.g. `"xs:element"` → `"element"`).
602fn local_name(name: &[u8]) -> &str {
603    let s = std::str::from_utf8(name).unwrap_or("");
604    s.rfind(':').map_or(s, |pos| &s[pos + 1..])
605}
606
607/// Convert a [`ComplexContentBuilder`] into the final [`ComplexContent`].
608fn finish_complex_content(b: ComplexContentBuilder) -> ComplexContent {
609    match b {
610        ComplexContentBuilder::Sequence(seq) => ComplexContent::Sequence(seq),
611        ComplexContentBuilder::Choice(ch) => ComplexContent::Choice(ch),
612        ComplexContentBuilder::SimpleContent {
613            base, attributes, ..
614        } => ComplexContent::SimpleContent { base, attributes },
615        ComplexContentBuilder::Any { namespace } => ComplexContent::Any { namespace },
616        ComplexContentBuilder::Empty => ComplexContent::Any { namespace: None },
617    }
618}
619
620// ---------------------------------------------------------------------------
621// Tests
622// ---------------------------------------------------------------------------
623
624#[cfg(test)]
625mod tests {
626    use super::*;
627
628    // -----------------------------------------------------------------------
629    // Helpers
630    // -----------------------------------------------------------------------
631
632    fn wrap(body: &str) -> String {
633        format!(
634            r#"<?xml version="1.0" encoding="UTF-8"?>
635<xs:schema
636    xmlns:xs="http://www.w3.org/2001/XMLSchema"
637    targetNamespace="urn:test"
638    elementFormDefault="qualified">
639{body}
640</xs:schema>"#
641        )
642    }
643
644    // -----------------------------------------------------------------------
645    // Basic schema structure
646    // -----------------------------------------------------------------------
647
648    #[test]
649    fn empty_schema_parses() {
650        let xml = wrap("");
651        let schema = parse_str(&xml).unwrap();
652        assert_eq!(schema.target_namespace, "urn:test");
653        assert!(schema.elements.is_empty());
654        assert!(schema.simple_types.is_empty());
655        assert!(schema.complex_types.is_empty());
656    }
657
658    #[test]
659    fn missing_schema_root_errors() {
660        let err = parse_str("<root/>").unwrap_err();
661        assert!(matches!(err, ParseError::MissingSchemaRoot));
662    }
663
664    // -----------------------------------------------------------------------
665    // Top-level elements
666    // -----------------------------------------------------------------------
667
668    #[test]
669    fn top_level_element() {
670        let xml = wrap(r#"<xs:element name="Document" type="Document"/>"#);
671        let schema = parse_str(&xml).unwrap();
672        assert_eq!(schema.elements.len(), 1);
673        let el = &schema.elements[0];
674        assert_eq!(el.name, "Document");
675        assert_eq!(el.type_name, "Document");
676    }
677
678    #[test]
679    fn multiple_top_level_elements() {
680        let xml = wrap(
681            r#"
682            <xs:element name="AppHdr" type="BusinessApplicationHeaderV04"/>
683            <xs:element name="Document" type="Document"/>
684            "#,
685        );
686        let schema = parse_str(&xml).unwrap();
687        assert_eq!(schema.elements.len(), 2);
688        assert_eq!(schema.elements[0].name, "AppHdr");
689        assert_eq!(schema.elements[1].name, "Document");
690    }
691
692    // -----------------------------------------------------------------------
693    // Simple types
694    // -----------------------------------------------------------------------
695
696    #[test]
697    fn simple_type_string_min_max_length() {
698        let xml = wrap(
699            r#"
700            <xs:simpleType name="Max35Text">
701                <xs:restriction base="xs:string">
702                    <xs:minLength value="1"/>
703                    <xs:maxLength value="35"/>
704                </xs:restriction>
705            </xs:simpleType>
706            "#,
707        );
708        let schema = parse_str(&xml).unwrap();
709        assert_eq!(schema.simple_types.len(), 1);
710        let st = &schema.simple_types[0];
711        assert_eq!(st.name, "Max35Text");
712        assert_eq!(st.restriction.base, "xs:string");
713        assert_eq!(
714            st.restriction.facets,
715            vec![Facet::MinLength(1), Facet::MaxLength(35)]
716        );
717    }
718
719    #[test]
720    fn simple_type_enumeration() {
721        let xml = wrap(
722            r#"
723            <xs:simpleType name="AddressType2Code">
724                <xs:restriction base="xs:string">
725                    <xs:enumeration value="ADDR"/>
726                    <xs:enumeration value="PBOX"/>
727                    <xs:enumeration value="HOME"/>
728                </xs:restriction>
729            </xs:simpleType>
730            "#,
731        );
732        let schema = parse_str(&xml).unwrap();
733        let st = &schema.simple_types[0];
734        assert_eq!(
735            st.restriction.facets,
736            vec![
737                Facet::Enumeration("ADDR".into()),
738                Facet::Enumeration("PBOX".into()),
739                Facet::Enumeration("HOME".into()),
740            ]
741        );
742    }
743
744    #[test]
745    fn simple_type_pattern() {
746        let xml = wrap(
747            r#"
748            <xs:simpleType name="CountryCode">
749                <xs:restriction base="xs:string">
750                    <xs:pattern value="[A-Z]{2,2}"/>
751                </xs:restriction>
752            </xs:simpleType>
753            "#,
754        );
755        let schema = parse_str(&xml).unwrap();
756        let st = &schema.simple_types[0];
757        assert_eq!(
758            st.restriction.facets,
759            vec![Facet::Pattern("[A-Z]{2,2}".into())]
760        );
761    }
762
763    #[test]
764    fn simple_type_decimal_restriction() {
765        let xml = wrap(
766            r#"
767            <xs:simpleType name="ActiveCurrencyAndAmount_SimpleType">
768                <xs:restriction base="xs:decimal">
769                    <xs:fractionDigits value="5"/>
770                    <xs:totalDigits value="18"/>
771                    <xs:minInclusive value="0"/>
772                </xs:restriction>
773            </xs:simpleType>
774            "#,
775        );
776        let schema = parse_str(&xml).unwrap();
777        let st = &schema.simple_types[0];
778        assert_eq!(st.restriction.base, "xs:decimal");
779        assert_eq!(
780            st.restriction.facets,
781            vec![
782                Facet::FractionDigits(5),
783                Facet::TotalDigits(18),
784                Facet::MinInclusive("0".into()),
785            ]
786        );
787    }
788
789    #[test]
790    fn simple_type_boolean_no_facets() {
791        let xml = wrap(
792            r#"
793            <xs:simpleType name="YesNoIndicator">
794                <xs:restriction base="xs:boolean"/>
795            </xs:simpleType>
796            "#,
797        );
798        let schema = parse_str(&xml).unwrap();
799        let st = &schema.simple_types[0];
800        assert_eq!(st.name, "YesNoIndicator");
801        assert_eq!(st.restriction.base, "xs:boolean");
802        assert!(st.restriction.facets.is_empty());
803    }
804
805    #[test]
806    fn simple_type_empty_string_restriction() {
807        // <xs:restriction base="xs:string"/> with no facets — used for
808        // unconstrained code types like BusinessMessagePriorityCode.
809        let xml = wrap(
810            r#"
811            <xs:simpleType name="BusinessMessagePriorityCode">
812                <xs:restriction base="xs:string"/>
813            </xs:simpleType>
814            "#,
815        );
816        let schema = parse_str(&xml).unwrap();
817        let st = &schema.simple_types[0];
818        assert_eq!(st.restriction.base, "xs:string");
819        assert!(st.restriction.facets.is_empty());
820    }
821
822    #[test]
823    fn simple_type_date_restriction() {
824        let xml = wrap(
825            r#"
826            <xs:simpleType name="ISODate">
827                <xs:restriction base="xs:date"/>
828            </xs:simpleType>
829            "#,
830        );
831        let schema = parse_str(&xml).unwrap();
832        let st = &schema.simple_types[0];
833        assert_eq!(st.restriction.base, "xs:date");
834    }
835
836    // -----------------------------------------------------------------------
837    // Complex types — sequence
838    // -----------------------------------------------------------------------
839
840    #[test]
841    fn complex_type_sequence_required_optional() {
842        let xml = wrap(
843            r#"
844            <xs:complexType name="BranchData5">
845                <xs:sequence>
846                    <xs:element name="FinInstnId" type="FinancialInstitutionIdentification23"/>
847                    <xs:element maxOccurs="1" minOccurs="0" name="BrnchId" type="BranchData5"/>
848                </xs:sequence>
849            </xs:complexType>
850            "#,
851        );
852        let schema = parse_str(&xml).unwrap();
853        assert_eq!(schema.complex_types.len(), 1);
854        let ct = &schema.complex_types[0];
855        assert_eq!(ct.name, "BranchData5");
856
857        if let ComplexContent::Sequence(ref seq) = ct.content {
858            assert_eq!(seq.len(), 2);
859            assert_eq!(seq[0].name, "FinInstnId");
860            assert_eq!(seq[0].min_occurs, 1);
861            assert_eq!(seq[0].max_occurs, MaxOccurs::Bounded(1));
862
863            assert_eq!(seq[1].name, "BrnchId");
864            assert_eq!(seq[1].min_occurs, 0);
865            assert_eq!(seq[1].max_occurs, MaxOccurs::Bounded(1));
866        } else {
867            panic!("expected Sequence, got {:?}", ct.content);
868        }
869    }
870
871    #[test]
872    fn complex_type_sequence_unbounded() {
873        let xml = wrap(
874            r#"
875            <xs:complexType name="BusinessApplicationHeaderV04">
876                <xs:sequence>
877                    <xs:element name="Fr" type="Party51Choice"/>
878                    <xs:element maxOccurs="unbounded" minOccurs="0" name="Rltd" type="BusinessApplicationHeader8"/>
879                </xs:sequence>
880            </xs:complexType>
881            "#,
882        );
883        let schema = parse_str(&xml).unwrap();
884        let ct = &schema.complex_types[0];
885        if let ComplexContent::Sequence(ref seq) = ct.content {
886            assert_eq!(seq[1].max_occurs, MaxOccurs::Unbounded);
887        } else {
888            panic!("expected Sequence");
889        }
890    }
891
892    #[test]
893    fn complex_type_sequence_bounded_max() {
894        // AdrLine maxOccurs="7" from PostalAddress27
895        let xml = wrap(
896            r#"
897            <xs:complexType name="PostalAddress27">
898                <xs:sequence>
899                    <xs:element maxOccurs="7" minOccurs="0" name="AdrLine" type="Max70Text"/>
900                </xs:sequence>
901            </xs:complexType>
902            "#,
903        );
904        let schema = parse_str(&xml).unwrap();
905        let ct = &schema.complex_types[0];
906        if let ComplexContent::Sequence(ref seq) = ct.content {
907            assert_eq!(seq[0].max_occurs, MaxOccurs::Bounded(7));
908        } else {
909            panic!("expected Sequence");
910        }
911    }
912
913    // -----------------------------------------------------------------------
914    // Complex types — choice
915    // -----------------------------------------------------------------------
916
917    #[test]
918    fn complex_type_choice() {
919        let xml = wrap(
920            r#"
921            <xs:complexType name="Party51Choice">
922                <xs:choice>
923                    <xs:element name="OrgId" type="PartyIdentification272"/>
924                    <xs:element name="FIId" type="BranchAndFinancialInstitutionIdentification8"/>
925                </xs:choice>
926            </xs:complexType>
927            "#,
928        );
929        let schema = parse_str(&xml).unwrap();
930        let ct = &schema.complex_types[0];
931        assert_eq!(ct.name, "Party51Choice");
932
933        if let ComplexContent::Choice(ref variants) = ct.content {
934            assert_eq!(variants.len(), 2);
935            assert_eq!(variants[0].name, "OrgId");
936            assert_eq!(variants[0].type_name, "PartyIdentification272");
937            assert_eq!(variants[1].name, "FIId");
938        } else {
939            panic!("expected Choice, got {:?}", ct.content);
940        }
941    }
942
943    // -----------------------------------------------------------------------
944    // Complex types — simpleContent / extension
945    // -----------------------------------------------------------------------
946
947    #[test]
948    fn complex_type_simple_content() {
949        let xml = wrap(
950            r#"
951            <xs:complexType name="ActiveCurrencyAndAmount">
952                <xs:simpleContent>
953                    <xs:extension base="ActiveCurrencyAndAmount_SimpleType">
954                        <xs:attribute name="Ccy" type="ActiveCurrencyCode" use="required"/>
955                    </xs:extension>
956                </xs:simpleContent>
957            </xs:complexType>
958            "#,
959        );
960        let schema = parse_str(&xml).unwrap();
961        let ct = &schema.complex_types[0];
962        assert_eq!(ct.name, "ActiveCurrencyAndAmount");
963
964        if let ComplexContent::SimpleContent {
965            ref base,
966            ref attributes,
967        } = ct.content
968        {
969            assert_eq!(base, "ActiveCurrencyAndAmount_SimpleType");
970            assert_eq!(attributes.len(), 1);
971            assert_eq!(attributes[0].name, "Ccy");
972            assert_eq!(attributes[0].type_name, "ActiveCurrencyCode");
973            assert!(attributes[0].required);
974        } else {
975            panic!("expected SimpleContent, got {:?}", ct.content);
976        }
977    }
978
979    #[test]
980    fn simple_content_optional_attribute() {
981        let xml = wrap(
982            r#"
983            <xs:complexType name="FooAmount">
984                <xs:simpleContent>
985                    <xs:extension base="FooAmount_SimpleType">
986                        <xs:attribute name="Ccy" type="CurrencyCode" use="optional"/>
987                    </xs:extension>
988                </xs:simpleContent>
989            </xs:complexType>
990            "#,
991        );
992        let schema = parse_str(&xml).unwrap();
993        let ct = &schema.complex_types[0];
994        if let ComplexContent::SimpleContent { ref attributes, .. } = ct.content {
995            assert!(!attributes[0].required);
996        } else {
997            panic!("expected SimpleContent");
998        }
999    }
1000
1001    // -----------------------------------------------------------------------
1002    // Complex types — xs:any
1003    // -----------------------------------------------------------------------
1004
1005    #[test]
1006    fn complex_type_any_with_namespace() {
1007        let xml = wrap(
1008            r#"
1009            <xs:complexType name="SignatureEnvelope">
1010                <xs:sequence>
1011                    <xs:any namespace="http://www.w3.org/2000/09/xmldsig#" processContents="lax"/>
1012                </xs:sequence>
1013            </xs:complexType>
1014            "#,
1015        );
1016        let schema = parse_str(&xml).unwrap();
1017        let ct = &schema.complex_types[0];
1018        if let ComplexContent::Any { ref namespace } = ct.content {
1019            assert_eq!(
1020                namespace.as_deref(),
1021                Some("http://www.w3.org/2000/09/xmldsig#")
1022            );
1023        } else {
1024            panic!("expected Any, got {:?}", ct.content);
1025        }
1026    }
1027
1028    // -----------------------------------------------------------------------
1029    // Mixed schema (multiple types)
1030    // -----------------------------------------------------------------------
1031
1032    #[test]
1033    fn mixed_schema_counts() {
1034        let xml = wrap(
1035            r#"
1036            <xs:element name="AppHdr" type="BusinessApplicationHeaderV04"/>
1037            <xs:simpleType name="Max35Text">
1038                <xs:restriction base="xs:string">
1039                    <xs:minLength value="1"/>
1040                    <xs:maxLength value="35"/>
1041                </xs:restriction>
1042            </xs:simpleType>
1043            <xs:complexType name="BranchData5">
1044                <xs:sequence>
1045                    <xs:element maxOccurs="1" minOccurs="0" name="Id" type="Max35Text"/>
1046                </xs:sequence>
1047            </xs:complexType>
1048            "#,
1049        );
1050        let schema = parse_str(&xml).unwrap();
1051        assert_eq!(schema.elements.len(), 1);
1052        assert_eq!(schema.simple_types.len(), 1);
1053        assert_eq!(schema.complex_types.len(), 1);
1054    }
1055
1056    // -----------------------------------------------------------------------
1057    // Real schema: head.001.001.04.xsd
1058    // -----------------------------------------------------------------------
1059
1060    #[test]
1061    fn parse_head_001_001_04() {
1062        let path = concat!(
1063            env!("CARGO_MANIFEST_DIR"),
1064            "/../../schemas/head/head.001.001.04.xsd"
1065        );
1066        let file = std::fs::File::open(path)
1067            .expect("head.001.001.04.xsd not found — run scripts/download-schemas.sh");
1068        let schema = parse(std::io::BufReader::new(file)).unwrap();
1069
1070        assert_eq!(
1071            schema.target_namespace,
1072            "urn:iso:std:iso:20022:tech:xsd:head.001.001.04"
1073        );
1074
1075        // Must have at least one top-level element.
1076        assert!(!schema.elements.is_empty(), "no top-level elements parsed");
1077
1078        let app_hdr = schema.elements.iter().find(|e| e.name == "AppHdr");
1079        assert!(app_hdr.is_some(), "AppHdr element not found");
1080        assert_eq!(app_hdr.unwrap().type_name, "BusinessApplicationHeaderV04");
1081
1082        // Spot-check simple types.
1083        assert!(
1084            schema.simple_types.iter().any(|s| s.name == "Max35Text"),
1085            "Max35Text not found"
1086        );
1087        let max35 = schema
1088            .simple_types
1089            .iter()
1090            .find(|s| s.name == "Max35Text")
1091            .unwrap();
1092        assert!(max35.restriction.facets.contains(&Facet::MinLength(1)));
1093        assert!(max35.restriction.facets.contains(&Facet::MaxLength(35)));
1094
1095        let country = schema
1096            .simple_types
1097            .iter()
1098            .find(|s| s.name == "CountryCode")
1099            .unwrap();
1100        assert!(country
1101            .restriction
1102            .facets
1103            .iter()
1104            .any(|f| matches!(f, Facet::Pattern(_))));
1105
1106        let yes_no = schema
1107            .simple_types
1108            .iter()
1109            .find(|s| s.name == "YesNoIndicator")
1110            .unwrap();
1111        assert_eq!(yes_no.restriction.base, "xs:boolean");
1112
1113        // Spot-check complex types.
1114        let party51 = schema
1115            .complex_types
1116            .iter()
1117            .find(|c| c.name == "Party51Choice")
1118            .unwrap();
1119        assert!(
1120            matches!(party51.content, ComplexContent::Choice(_)),
1121            "Party51Choice should be Choice"
1122        );
1123
1124        let sig_env = schema
1125            .complex_types
1126            .iter()
1127            .find(|c| c.name == "SignatureEnvelope")
1128            .unwrap();
1129        assert!(
1130            matches!(sig_env.content, ComplexContent::Any { .. }),
1131            "SignatureEnvelope should be Any"
1132        );
1133
1134        let branch = schema
1135            .complex_types
1136            .iter()
1137            .find(|c| c.name == "BranchData5")
1138            .unwrap();
1139        if let ComplexContent::Sequence(ref seq) = branch.content {
1140            assert!(!seq.is_empty());
1141        } else {
1142            panic!("BranchData5 should be Sequence");
1143        }
1144    }
1145
1146    // -----------------------------------------------------------------------
1147    // Real schema: pacs.008.001.10.xsd
1148    // -----------------------------------------------------------------------
1149
1150    #[test]
1151    fn parse_pacs_008_001_10() {
1152        let path = concat!(
1153            env!("CARGO_MANIFEST_DIR"),
1154            "/../../schemas/pacs/pacs.008.001.10.xsd"
1155        );
1156        let file = match std::fs::File::open(path) {
1157            Ok(f) => f,
1158            // Schema file may not be present in CI — skip gracefully.
1159            Err(_) => return,
1160        };
1161        let schema = parse(std::io::BufReader::new(file)).unwrap();
1162
1163        assert_eq!(
1164            schema.target_namespace,
1165            "urn:iso:std:iso:20022:tech:xsd:pacs.008.001.10"
1166        );
1167
1168        // ActiveCurrencyAndAmount_SimpleType uses xs:decimal with facets.
1169        let decimal_st = schema
1170            .simple_types
1171            .iter()
1172            .find(|s| s.name == "ActiveCurrencyAndAmount_SimpleType")
1173            .unwrap();
1174        assert_eq!(decimal_st.restriction.base, "xs:decimal");
1175        assert!(decimal_st
1176            .restriction
1177            .facets
1178            .contains(&Facet::FractionDigits(5)));
1179        assert!(decimal_st
1180            .restriction
1181            .facets
1182            .contains(&Facet::TotalDigits(18)));
1183        assert!(decimal_st
1184            .restriction
1185            .facets
1186            .contains(&Facet::MinInclusive("0".into())));
1187
1188        // ActiveCurrencyAndAmount is a simpleContent type.
1189        let amount_ct = schema
1190            .complex_types
1191            .iter()
1192            .find(|c| c.name == "ActiveCurrencyAndAmount")
1193            .unwrap();
1194        if let ComplexContent::SimpleContent {
1195            ref base,
1196            ref attributes,
1197        } = amount_ct.content
1198        {
1199            assert_eq!(base, "ActiveCurrencyAndAmount_SimpleType");
1200            assert_eq!(attributes.len(), 1);
1201            assert_eq!(attributes[0].name, "Ccy");
1202            assert!(attributes[0].required);
1203        } else {
1204            panic!(
1205                "ActiveCurrencyAndAmount should be SimpleContent, got {:?}",
1206                amount_ct.content
1207            );
1208        }
1209
1210        // Document element must be present.
1211        assert!(schema.elements.iter().any(|e| e.name == "Document"));
1212    }
1213}