Skip to main content

xsd_schema/validation/
quick_xml_driver.rs

1//! Reusable driver that wires a [`quick_xml::Reader`] event stream into a
2//! [`ValidationRuntime`].
3//!
4//! Two layers are provided:
5//!
6//! * [`drive_quick_xml`] / [`drive_quick_xml_in`] — turn-key, all you want is
7//!   for the runtime's [`ValidationSink`] to receive every diagnostic. The
8//!   helper calls [`ValidationRuntime::end_validation`] for you.
9//! * [`drive_quick_xml_with`] / [`drive_quick_xml_with_in`] — callback-driven,
10//!   for callers that need to interleave work between validator events
11//!   (typed-document construction, source-span tracking, etc). The caller is
12//!   responsible for `end_validation` on this path; see the
13//!   [`ValidationEventHandler`] trait for hook ordering.
14//!
15//! DTD-related events ([`Event::DocType`], [`Event::Decl`]) are silently
16//! dropped. Comments and processing instructions flow through the layer-2
17//! hooks; layer 1 ignores them via [`NoopHandler`].
18//!
19//! Namespace scoping (xmlns push/pop), `xsi:type`/`xsi:nil` discovery, and
20//! [`NamespaceContextSnapshot`] construction are handled internally so the
21//! caller does not have to.
22
23use std::collections::HashMap;
24use std::convert::Infallible;
25use std::io::BufRead;
26
27use quick_xml::events::{BytesStart, Event};
28use quick_xml::Reader;
29use thiserror::Error;
30
31use crate::namespace::context::NamespaceContextSnapshot;
32use crate::namespace::table::{XML_NAMESPACE, XSI_NAMESPACE};
33use crate::schema::SchemaSet;
34use crate::validation::errors::ValidationError;
35use crate::validation::info::{SchemaInfo, SchemaValidity};
36use crate::validation::runtime::ValidationRuntime;
37use crate::validation::validator::ValidationSink;
38
39// ── Public types ──────────────────────────────────────────────────────────
40
41/// Final outcome of a successful drive call.
42///
43/// Validation diagnostics are reported through the runtime's
44/// [`ValidationSink`], not through this struct.
45#[derive(Debug, Clone)]
46pub struct DriveOutcome {
47    /// Validity of the root element after `validate_end_element`.
48    /// `None` if the stream contained no elements.
49    pub root_validity: Option<SchemaValidity>,
50    /// Maximum element depth observed.
51    pub max_depth: usize,
52}
53
54/// Errors raised by the layer-1 helpers.
55#[derive(Debug, Error)]
56pub enum DriveError {
57    #[error("xml parse error: {0}")]
58    Parse(#[from] quick_xml::Error),
59    #[error("utf-8 error: {0}")]
60    Utf8(#[from] std::str::Utf8Error),
61    #[error("unbound prefix '{0}'")]
62    UnboundPrefix(String),
63    /// Stream ended with `depth > 0` (open elements not closed).
64    #[error("unexpected eof: {depth} element(s) still open")]
65    UnexpectedEof { depth: usize },
66    /// `runtime.end_validation()` returned `Err` after the stream was driven.
67    #[error("end_validation failed: {0}")]
68    Validation(ValidationError),
69}
70
71/// Errors raised by the layer-2 helpers.
72#[derive(Debug, Error)]
73pub enum DriveWithError<E> {
74    #[error("xml parse error: {0}")]
75    Parse(quick_xml::Error),
76    #[error("utf-8 error: {0}")]
77    Utf8(std::str::Utf8Error),
78    #[error("unbound prefix '{0}'")]
79    UnboundPrefix(String),
80    #[error("unexpected eof: {depth} element(s) still open")]
81    UnexpectedEof { depth: usize },
82    /// A handler hook returned an error.
83    #[error("hook error")]
84    Hook(E),
85}
86
87/// View of an element-start (or empty) event passed to handler hooks.
88///
89/// Borrowed slices live until the next `read_event_into` call, so hooks must
90/// not retain references past their return.
91#[derive(Clone, Copy)]
92pub struct ElementStartView<'a> {
93    pub local_name: &'a str,
94    pub namespace_uri: &'a str,
95    pub prefix: &'a str,
96    /// Lexical value of `xsi:type`, if present.
97    pub xsi_type: Option<&'a str>,
98    /// Lexical value of `xsi:nil`, if present.
99    pub xsi_nil: Option<&'a str>,
100    /// Pre-built snapshot used for QName resolution inside the runtime.
101    pub ns_context: &'a NamespaceContextSnapshot,
102    /// xmlns declarations on THIS element (prefix, uri). Empty prefix is the
103    /// default-namespace declaration.
104    pub namespace_decls: &'a [(&'a str, &'a str)],
105    /// `true` if this came from `Event::Empty`.
106    pub is_empty: bool,
107}
108
109/// View of a non-xmlns attribute.
110#[derive(Clone, Copy)]
111pub struct AttributeView<'a> {
112    pub local_name: &'a str,
113    pub namespace_uri: &'a str,
114    pub prefix: &'a str,
115    /// Already unescaped (entity references resolved by quick-xml).
116    pub value: &'a str,
117}
118
119/// Payload for [`ValidationEventHandler::after_end_of_attributes`].
120pub struct EndOfAttributesView<'a> {
121    pub info: &'a SchemaInfo,
122    /// Drained `take_deferred_attribute_results()` payload, in original
123    /// attribute encounter order. Empty whenever no CTA reselection occurred.
124    #[cfg(feature = "xsd11")]
125    pub deferred_attribute_results: &'a [SchemaInfo],
126}
127
128/// What the runtime returned from `validate_end_element`.
129#[derive(Debug, Clone, Copy)]
130pub struct EndElementInfo {
131    pub validity: SchemaValidity,
132}
133
134/// How a text/CDATA event was dispatched into the runtime.
135#[derive(Debug, Clone, Copy)]
136pub enum TextKind {
137    /// `Event::Text` whose unescaped content is all whitespace —
138    /// forwarded to `runtime.validate_whitespace`.
139    Whitespace,
140    /// `Event::Text` with non-whitespace content —
141    /// forwarded to `runtime.validate_text`.
142    Character,
143    /// `Event::CData` — always forwarded to `runtime.validate_text`.
144    CData,
145}
146
147// ── Handler trait ─────────────────────────────────────────────────────────
148
149/// Handler invoked at each validator-event boundary.
150///
151/// Every method has a default implementation that does nothing, so a
152/// handler that only cares about (say) end-of-element fires exactly that
153/// one method.
154///
155/// Hook ordering for one element:
156/// 1. `on_element_start_offset`
157/// 2. `before_element`
158/// 3. (internal) `runtime.validate_element`
159/// 4. `after_element`
160/// 5. For each non-xmlns attribute, in document order:
161///    1. `before_attribute`
162///    2. (internal) `runtime.validate_attribute`
163///    3. `after_attribute`
164/// 6. (internal) `runtime.validate_end_of_attributes`
165/// 7. (internal, xsd11) `runtime.take_deferred_attribute_results`
166/// 8. `after_end_of_attributes`
167/// 9. Body events: `on_text`, `on_comment`, `on_processing_instruction`.
168/// 10. On the closing event:
169///     1. (internal) `runtime.validate_end_element`
170///     2. `after_end_element`
171///     3. `on_element_end_offset`
172pub trait ValidationEventHandler {
173    /// Caller's hook-error type. Reported through
174    /// [`DriveWithError::Hook`].
175    type Error;
176
177    fn before_element(&mut self, _view: ElementStartView<'_>) -> Result<(), Self::Error> {
178        Ok(())
179    }
180
181    fn after_element(
182        &mut self,
183        _view: ElementStartView<'_>,
184        _info: &SchemaInfo,
185    ) -> Result<(), Self::Error> {
186        Ok(())
187    }
188
189    fn before_attribute(&mut self, _view: AttributeView<'_>) -> Result<(), Self::Error> {
190        Ok(())
191    }
192
193    fn after_attribute(
194        &mut self,
195        _view: AttributeView<'_>,
196        _info: &SchemaInfo,
197    ) -> Result<(), Self::Error> {
198        Ok(())
199    }
200
201    fn after_end_of_attributes(
202        &mut self,
203        _view: EndOfAttributesView<'_>,
204    ) -> Result<(), Self::Error> {
205        Ok(())
206    }
207
208    /// `depth` is the depth at which the element existed (1 = root close).
209    fn after_end_element(
210        &mut self,
211        _info: &EndElementInfo,
212        _depth: usize,
213    ) -> Result<(), Self::Error> {
214        Ok(())
215    }
216
217    fn on_text(&mut self, _kind: TextKind, _text: &str) -> Result<(), Self::Error> {
218        Ok(())
219    }
220
221    fn on_comment(&mut self, _text: &str) -> Result<(), Self::Error> {
222        Ok(())
223    }
224
225    fn on_processing_instruction(
226        &mut self,
227        _target: &str,
228        _data: &str,
229    ) -> Result<(), Self::Error> {
230        Ok(())
231    }
232
233    /// Buffer offset of the `<` for the element about to be reported to
234    /// `before_element`. Default impl is a no-op; override only when
235    /// building a span-aware DOM.
236    fn on_element_start_offset(&mut self, _byte_pos: usize) -> Result<(), Self::Error> {
237        Ok(())
238    }
239
240    /// Buffer offset just past the `>` of the closing tag. Default impl is a
241    /// no-op; override only when building a span-aware DOM.
242    fn on_element_end_offset(&mut self, _byte_pos: usize) -> Result<(), Self::Error> {
243        Ok(())
244    }
245}
246
247/// Zero-sized handler whose every method is the trait default.
248#[derive(Debug, Default, Clone, Copy)]
249pub struct NoopHandler;
250
251impl ValidationEventHandler for NoopHandler {
252    type Error = Infallible;
253}
254
255// ── Layer 1: turn-key ─────────────────────────────────────────────────────
256
257/// Drive a quick-xml stream into `runtime`, then call
258/// `runtime.end_validation()`.
259///
260/// Validation diagnostics arrive through the sink the runtime was built with.
261/// DTD events are silently dropped. Comments and PIs are dropped.
262pub fn drive_quick_xml<R, S>(
263    reader: R,
264    runtime: &mut ValidationRuntime<'_, S>,
265    schema_set: &SchemaSet,
266) -> Result<DriveOutcome, DriveError>
267where
268    R: BufRead,
269    S: ValidationSink,
270{
271    let mut buf = Vec::new();
272    drive_quick_xml_in(reader, runtime, schema_set, &mut buf)
273}
274
275/// [`drive_quick_xml`] variant that reuses a caller-supplied buffer.
276pub fn drive_quick_xml_in<R, S>(
277    reader: R,
278    runtime: &mut ValidationRuntime<'_, S>,
279    schema_set: &SchemaSet,
280    buf: &mut Vec<u8>,
281) -> Result<DriveOutcome, DriveError>
282where
283    R: BufRead,
284    S: ValidationSink,
285{
286    let mut handler = NoopHandler;
287    let outcome =
288        drive_quick_xml_with_in(reader, runtime, schema_set, &mut handler, buf).map_err(
289            |e| match e {
290                DriveWithError::Parse(e) => DriveError::Parse(e),
291                DriveWithError::Utf8(e) => DriveError::Utf8(e),
292                DriveWithError::UnboundPrefix(p) => DriveError::UnboundPrefix(p),
293                DriveWithError::UnexpectedEof { depth } => DriveError::UnexpectedEof { depth },
294                DriveWithError::Hook(_) => unreachable!("NoopHandler is infallible"),
295            },
296        )?;
297    runtime.end_validation().map_err(DriveError::Validation)?;
298    Ok(outcome)
299}
300
301// ── Layer 2: handler-driven ───────────────────────────────────────────────
302
303/// Drive a quick-xml stream into `runtime`, invoking `handler` at each
304/// validator-event boundary.
305///
306/// **Does NOT call `runtime.end_validation()`.** The caller must do so after
307/// any post-stream state collection (e.g. `runtime.schema_location_hints()`).
308pub fn drive_quick_xml_with<R, S, H>(
309    reader: R,
310    runtime: &mut ValidationRuntime<'_, S>,
311    schema_set: &SchemaSet,
312    handler: &mut H,
313) -> Result<DriveOutcome, DriveWithError<H::Error>>
314where
315    R: BufRead,
316    S: ValidationSink,
317    H: ValidationEventHandler,
318{
319    let mut buf = Vec::new();
320    drive_quick_xml_with_in(reader, runtime, schema_set, handler, &mut buf)
321}
322
323/// [`drive_quick_xml_with`] variant that reuses a caller-supplied buffer.
324pub fn drive_quick_xml_with_in<R, S, H>(
325    reader: R,
326    runtime: &mut ValidationRuntime<'_, S>,
327    schema_set: &SchemaSet,
328    handler: &mut H,
329    buf: &mut Vec<u8>,
330) -> Result<DriveOutcome, DriveWithError<H::Error>>
331where
332    R: BufRead,
333    S: ValidationSink,
334    H: ValidationEventHandler,
335{
336    let mut xml_reader = Reader::from_reader(reader);
337    xml_reader.trim_text(false);
338
339    // prefix bytes -> stack of URI strings (top-of-stack = current binding)
340    let mut prefix_map: HashMap<Vec<u8>, Vec<String>> = HashMap::new();
341    // Always-in-scope xml prefix (XML Namespaces §3).
342    prefix_map
343        .entry(b"xml".to_vec())
344        .or_default()
345        .push(XML_NAMESPACE.to_string());
346    // Default-namespace seed; explicit declarations overwrite the top-of-stack.
347    prefix_map.entry(Vec::new()).or_default().push(String::new());
348
349    // Per-element list of prefixes that need popping at end-of-element.
350    let mut scope_stack: Vec<Vec<Vec<u8>>> = Vec::new();
351
352    let mut depth: usize = 0;
353    let mut max_depth: usize = 0;
354    let mut root_validity: Option<SchemaValidity> = None;
355
356    buf.clear();
357
358    loop {
359        let event_start = xml_reader.buffer_position();
360        match xml_reader.read_event_into(buf) {
361            Ok(Event::Start(ref e)) => {
362                handle_start_or_empty(
363                    e,
364                    false,
365                    event_start,
366                    &mut xml_reader,
367                    runtime,
368                    schema_set,
369                    handler,
370                    &mut prefix_map,
371                    &mut scope_stack,
372                    &mut depth,
373                    &mut max_depth,
374                    &mut root_validity,
375                )?;
376            }
377            Ok(Event::Empty(ref e)) => {
378                handle_start_or_empty(
379                    e,
380                    true,
381                    event_start,
382                    &mut xml_reader,
383                    runtime,
384                    schema_set,
385                    handler,
386                    &mut prefix_map,
387                    &mut scope_stack,
388                    &mut depth,
389                    &mut max_depth,
390                    &mut root_validity,
391                )?;
392            }
393            Ok(Event::End(_)) => {
394                let end_info = runtime.validate_end_element();
395                let end = EndElementInfo {
396                    validity: end_info.validity,
397                };
398                handler
399                    .after_end_element(&end, depth)
400                    .map_err(DriveWithError::Hook)?;
401                let end_pos = xml_reader.buffer_position();
402                handler
403                    .on_element_end_offset(end_pos)
404                    .map_err(DriveWithError::Hook)?;
405                if depth == 1 {
406                    root_validity = Some(end_info.validity);
407                }
408                pop_xmlns_scope(&mut prefix_map, &mut scope_stack);
409                depth = depth.saturating_sub(1);
410            }
411            Ok(Event::Text(ref e)) if depth > 0 => {
412                let text = e.unescape().map_err(DriveWithError::Parse)?;
413                if text.chars().all(|c| c.is_whitespace()) {
414                    runtime.validate_whitespace(&text);
415                    handler
416                        .on_text(TextKind::Whitespace, &text)
417                        .map_err(DriveWithError::Hook)?;
418                } else {
419                    runtime.validate_text(&text);
420                    handler
421                        .on_text(TextKind::Character, &text)
422                        .map_err(DriveWithError::Hook)?;
423                }
424            }
425            Ok(Event::CData(ref e)) if depth > 0 => {
426                let s = std::str::from_utf8(e.as_ref()).map_err(DriveWithError::Utf8)?;
427                runtime.validate_text(s);
428                handler
429                    .on_text(TextKind::CData, s)
430                    .map_err(DriveWithError::Hook)?;
431            }
432            Ok(Event::Text(_) | Event::CData(_)) => {
433                // Outside any element — significant for neither validator nor handler.
434            }
435            Ok(Event::Comment(ref e)) => {
436                let s = std::str::from_utf8(e.as_ref()).map_err(DriveWithError::Utf8)?;
437                handler.on_comment(s).map_err(DriveWithError::Hook)?;
438            }
439            Ok(Event::PI(ref e)) => {
440                let raw = std::str::from_utf8(e.as_ref()).map_err(DriveWithError::Utf8)?;
441                let (target, data) = parse_pi_content(raw);
442                handler
443                    .on_processing_instruction(target, data)
444                    .map_err(DriveWithError::Hook)?;
445            }
446            Ok(Event::Decl(_) | Event::DocType(_)) => {}
447            Ok(Event::Eof) => {
448                if depth != 0 {
449                    return Err(DriveWithError::UnexpectedEof { depth });
450                }
451                break;
452            }
453            Err(e) => return Err(DriveWithError::Parse(e)),
454        }
455        buf.clear();
456    }
457
458    Ok(DriveOutcome {
459        root_validity,
460        max_depth,
461    })
462}
463
464// ── Internals ─────────────────────────────────────────────────────────────
465
466#[allow(clippy::too_many_arguments)]
467fn handle_start_or_empty<R, S, H>(
468    e: &BytesStart<'_>,
469    is_empty: bool,
470    event_start: usize,
471    xml_reader: &mut Reader<R>,
472    runtime: &mut ValidationRuntime<'_, S>,
473    schema_set: &SchemaSet,
474    handler: &mut H,
475    prefix_map: &mut HashMap<Vec<u8>, Vec<String>>,
476    scope_stack: &mut Vec<Vec<Vec<u8>>>,
477    depth: &mut usize,
478    max_depth: &mut usize,
479    root_validity: &mut Option<SchemaValidity>,
480) -> Result<(), DriveWithError<H::Error>>
481where
482    R: BufRead,
483    S: ValidationSink,
484    H: ValidationEventHandler,
485{
486    *depth += 1;
487    if *depth > *max_depth {
488        *max_depth = *depth;
489    }
490
491    // 1. Push xmlns scope, collect declarations as (prefix_string, uri_string).
492    let ns_decls_owned = match push_xmlns_scope(e, prefix_map, scope_stack) {
493        Ok(v) => v,
494        Err(err) => {
495            // Scope was not pushed, so just unwind depth.
496            *depth -= 1;
497            return Err(err);
498        }
499    };
500    let ns_decls: Vec<(&str, &str)> = ns_decls_owned
501        .iter()
502        .map(|(p, u)| (p.as_str(), u.as_str()))
503        .collect();
504
505    // 2. Resolve element name + scan xsi:type / xsi:nil.
506    let (local_name, namespace_uri, elem_prefix) = match resolve_element_qname(e, prefix_map) {
507        Ok(v) => v,
508        Err(err) => {
509            pop_xmlns_scope(prefix_map, scope_stack);
510            *depth -= 1;
511            return Err(err);
512        }
513    };
514    let (xsi_type, xsi_nil) = match scan_xsi_attributes(e, prefix_map) {
515        Ok(v) => v,
516        Err(err) => {
517            pop_xmlns_scope(prefix_map, scope_stack);
518            *depth -= 1;
519            return Err(err);
520        }
521    };
522
523    let ns_ctx = build_ns_context(prefix_map, schema_set);
524
525    let view = ElementStartView {
526        local_name: &local_name,
527        namespace_uri: &namespace_uri,
528        prefix: &elem_prefix,
529        xsi_type: xsi_type.as_deref(),
530        xsi_nil: xsi_nil.as_deref(),
531        ns_context: &ns_ctx,
532        namespace_decls: &ns_decls,
533        is_empty,
534    };
535
536    // 3. Spans + before_element.
537    if let Err(err) = handler.on_element_start_offset(event_start) {
538        pop_xmlns_scope(prefix_map, scope_stack);
539        *depth -= 1;
540        return Err(DriveWithError::Hook(err));
541    }
542    if let Err(err) = handler.before_element(view) {
543        pop_xmlns_scope(prefix_map, scope_stack);
544        *depth -= 1;
545        return Err(DriveWithError::Hook(err));
546    }
547
548    // 4. Element validation + after_element.
549    let info = runtime.validate_element(
550        &local_name,
551        &namespace_uri,
552        xsi_type.as_deref(),
553        xsi_nil.as_deref(),
554        &ns_ctx,
555    );
556    handler
557        .after_element(view, &info)
558        .map_err(DriveWithError::Hook)?;
559
560    // 5. Attributes.
561    for attr_result in e.attributes() {
562        let attr = attr_result.map_err(|err| DriveWithError::Parse(err.into()))?;
563        let key = attr.key.as_ref();
564        if key == b"xmlns" || key.starts_with(b"xmlns:") {
565            continue;
566        }
567        let (prefix_bytes, local_bytes) = split_prefix_local(key);
568        let attr_local = std::str::from_utf8(local_bytes).map_err(DriveWithError::Utf8)?;
569        let attr_prefix = std::str::from_utf8(prefix_bytes).map_err(DriveWithError::Utf8)?;
570        let attr_ns = if prefix_bytes.is_empty() {
571            String::new()
572        } else {
573            match prefix_map
574                .get(prefix_bytes)
575                .and_then(|stack| stack.last())
576            {
577                Some(uri) => uri.clone(),
578                None => return Err(DriveWithError::UnboundPrefix(attr_prefix.to_string())),
579            }
580        };
581        let value = attr
582            .unescape_value()
583            .map_err(DriveWithError::Parse)?;
584
585        let av = AttributeView {
586            local_name: attr_local,
587            namespace_uri: &attr_ns,
588            prefix: attr_prefix,
589            value: value.as_ref(),
590        };
591        handler.before_attribute(av).map_err(DriveWithError::Hook)?;
592        let attr_info = runtime.validate_attribute(attr_local, &attr_ns, value.as_ref());
593        handler
594            .after_attribute(av, &attr_info)
595            .map_err(DriveWithError::Hook)?;
596    }
597
598    // 6. End-of-attributes.
599    let eoa_info = runtime.validate_end_of_attributes();
600    #[cfg(feature = "xsd11")]
601    let deferred = runtime.take_deferred_attribute_results();
602    let eoa_view = EndOfAttributesView {
603        info: &eoa_info,
604        #[cfg(feature = "xsd11")]
605        deferred_attribute_results: &deferred,
606    };
607    handler
608        .after_end_of_attributes(eoa_view)
609        .map_err(DriveWithError::Hook)?;
610
611    // 7. For empty elements, close inline.
612    if is_empty {
613        let end_info = runtime.validate_end_element();
614        let end = EndElementInfo {
615            validity: end_info.validity,
616        };
617        handler
618            .after_end_element(&end, *depth)
619            .map_err(DriveWithError::Hook)?;
620        let end_pos = xml_reader.buffer_position();
621        handler
622            .on_element_end_offset(end_pos)
623            .map_err(DriveWithError::Hook)?;
624        if *depth == 1 {
625            *root_validity = Some(end_info.validity);
626        }
627        pop_xmlns_scope(prefix_map, scope_stack);
628        *depth -= 1;
629    }
630
631    Ok(())
632}
633
634/// Collect xmlns / xmlns:* declarations on `e` into `prefix_map`. Returns
635/// the (prefix, uri) declarations so they can be exposed to handlers.
636fn push_xmlns_scope<E>(
637    e: &BytesStart<'_>,
638    prefix_map: &mut HashMap<Vec<u8>, Vec<String>>,
639    scope_stack: &mut Vec<Vec<Vec<u8>>>,
640) -> Result<Vec<(String, String)>, DriveWithError<E>> {
641    let mut declared: Vec<Vec<u8>> = Vec::new();
642    let mut decls_owned: Vec<(String, String)> = Vec::new();
643
644    for attr_result in e.attributes() {
645        let attr = attr_result.map_err(|err| DriveWithError::Parse(err.into()))?;
646        let key = attr.key.as_ref();
647        let (prefix_bytes, prefix_str) = if key == b"xmlns" {
648            (Vec::new(), String::new())
649        } else if let Some(rest) = key.strip_prefix(b"xmlns:") {
650            let prefix_str = std::str::from_utf8(rest)
651                .map_err(DriveWithError::Utf8)?
652                .to_string();
653            (rest.to_vec(), prefix_str)
654        } else {
655            continue;
656        };
657        let value = attr.unescape_value().map_err(DriveWithError::Parse)?;
658        let uri = value.into_owned();
659        prefix_map
660            .entry(prefix_bytes.clone())
661            .or_default()
662            .push(uri.clone());
663        declared.push(prefix_bytes);
664        decls_owned.push((prefix_str, uri));
665    }
666
667    scope_stack.push(declared);
668    Ok(decls_owned)
669}
670
671fn pop_xmlns_scope(
672    prefix_map: &mut HashMap<Vec<u8>, Vec<String>>,
673    scope_stack: &mut Vec<Vec<Vec<u8>>>,
674) {
675    if let Some(declared) = scope_stack.pop() {
676        for prefix in declared {
677            if let Some(stack) = prefix_map.get_mut(&prefix) {
678                stack.pop();
679                if stack.is_empty() {
680                    prefix_map.remove(&prefix);
681                }
682            }
683        }
684    }
685}
686
687fn resolve_element_qname<E>(
688    e: &BytesStart<'_>,
689    prefix_map: &HashMap<Vec<u8>, Vec<String>>,
690) -> Result<(String, String, String), DriveWithError<E>> {
691    let name = e.name();
692    let (prefix_bytes, local_bytes) = split_prefix_local(name.as_ref());
693    let local = std::str::from_utf8(local_bytes)
694        .map_err(DriveWithError::Utf8)?
695        .to_string();
696    let prefix = std::str::from_utf8(prefix_bytes)
697        .map_err(DriveWithError::Utf8)?
698        .to_string();
699    let namespace = if prefix_bytes.is_empty() {
700        prefix_map
701            .get(prefix_bytes)
702            .and_then(|stack| stack.last())
703            .cloned()
704            .unwrap_or_default()
705    } else {
706        match prefix_map.get(prefix_bytes).and_then(|stack| stack.last()) {
707            Some(uri) => uri.clone(),
708            None => return Err(DriveWithError::UnboundPrefix(prefix)),
709        }
710    };
711    Ok((local, namespace, prefix))
712}
713
714fn scan_xsi_attributes<E>(
715    e: &BytesStart<'_>,
716    prefix_map: &HashMap<Vec<u8>, Vec<String>>,
717) -> Result<(Option<String>, Option<String>), DriveWithError<E>> {
718    let mut xsi_type: Option<String> = None;
719    let mut xsi_nil: Option<String> = None;
720    for attr_result in e.attributes() {
721        let attr = attr_result.map_err(|err| DriveWithError::Parse(err.into()))?;
722        let key = attr.key.as_ref();
723        if key == b"xmlns" || key.starts_with(b"xmlns:") {
724            continue;
725        }
726        let (prefix_bytes, local_bytes) = split_prefix_local(key);
727        if prefix_bytes.is_empty() {
728            continue;
729        }
730        let ns_uri = match prefix_map.get(prefix_bytes).and_then(|s| s.last()) {
731            Some(uri) => uri.as_str(),
732            None => continue,
733        };
734        if ns_uri != XSI_NAMESPACE {
735            continue;
736        }
737        let local = std::str::from_utf8(local_bytes).map_err(DriveWithError::Utf8)?;
738        let value = attr.unescape_value().map_err(DriveWithError::Parse)?;
739        match local {
740            "type" => xsi_type = Some(value.into_owned()),
741            "nil" => xsi_nil = Some(value.into_owned()),
742            _ => {}
743        }
744    }
745    Ok((xsi_type, xsi_nil))
746}
747
748fn build_ns_context(
749    prefix_map: &HashMap<Vec<u8>, Vec<String>>,
750    schema_set: &SchemaSet,
751) -> NamespaceContextSnapshot {
752    let mut snapshot = NamespaceContextSnapshot::default();
753
754    for (prefix_bytes, stack) in prefix_map {
755        let uri = match stack.last() {
756            Some(s) => s,
757            None => continue,
758        };
759        if prefix_bytes.is_empty() {
760            // Default namespace; skip the empty seed binding.
761            if uri.is_empty() {
762                continue;
763            }
764            snapshot.default_ns = Some(schema_set.name_table.add(uri));
765        } else if let Ok(prefix_str) = std::str::from_utf8(prefix_bytes) {
766            // Skip the always-in-scope xml prefix and any xmlns binding —
767            // the runtime treats them as implicit.
768            if prefix_str == "xml" || prefix_str == "xmlns" || uri.is_empty() {
769                continue;
770            }
771            let prefix_id = schema_set.name_table.add(prefix_str);
772            let uri_id = schema_set.name_table.add(uri);
773            snapshot.bindings.push((prefix_id, uri_id));
774        }
775    }
776
777    snapshot
778}
779
780fn split_prefix_local(name: &[u8]) -> (&[u8], &[u8]) {
781    match name.iter().position(|&b| b == b':') {
782        Some(pos) => (&name[..pos], &name[pos + 1..]),
783        None => (b"", name),
784    }
785}
786
787fn parse_pi_content(raw: &str) -> (&str, &str) {
788    let trimmed = raw.trim();
789    match trimmed.find(|c: char| c.is_ascii_whitespace()) {
790        Some(pos) => (&trimmed[..pos], trimmed[pos..].trim_start()),
791        None => (trimmed, ""),
792    }
793}
794
795// ── Tests ─────────────────────────────────────────────────────────────────
796
797#[cfg(test)]
798mod tests {
799    use super::*;
800    use crate::pipeline::load_and_process_schema;
801    use crate::validation::{
802        CollectingValidationSink, SchemaValidator, ValidationFlags, ValidationWarning,
803    };
804
805    fn load_schema(xsd: &str) -> SchemaSet {
806        let mut ss = SchemaSet::new();
807        load_and_process_schema(xsd.as_bytes(), "test.xsd", &mut ss, None)
808            .expect("schema parse");
809        ss
810    }
811
812    fn run(xsd: &str, instance: &str) -> (DriveOutcome, Vec<String>) {
813        let schema_set = load_schema(xsd);
814        let validator = SchemaValidator::new(&schema_set, ValidationFlags::default());
815        let mut errors = Vec::new();
816        let mut warnings: Vec<ValidationWarning> = Vec::new();
817        let sink = CollectingValidationSink {
818            errors: &mut errors,
819            warnings: &mut warnings,
820        };
821        let mut runtime = validator.start_run(sink);
822        let outcome = drive_quick_xml(instance.as_bytes(), &mut runtime, &schema_set)
823            .expect("drive failed");
824        (outcome, errors.iter().map(|e| e.to_string()).collect())
825    }
826
827    #[test]
828    fn simple_valid_root() {
829        let (outcome, errors) = run(
830            r#"<xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema">
831                <xs:element name="root" type="xs:string"/>
832            </xs:schema>"#,
833            "<root>hello</root>",
834        );
835        assert!(errors.is_empty(), "expected no errors, got {errors:?}");
836        assert!(matches!(outcome.root_validity, Some(SchemaValidity::Valid)));
837        assert_eq!(outcome.max_depth, 1);
838    }
839
840    #[test]
841    fn empty_root_element() {
842        let (outcome, _errors) = run(
843            r#"<xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema">
844                <xs:element name="root">
845                    <xs:complexType/>
846                </xs:element>
847            </xs:schema>"#,
848            "<root/>",
849        );
850        assert!(matches!(outcome.root_validity, Some(SchemaValidity::Valid)));
851        assert_eq!(outcome.max_depth, 1);
852    }
853
854    #[test]
855    fn unexpected_eof_open_element() {
856        let schema_set = load_schema(
857            r#"<xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema">
858                <xs:element name="root" type="xs:string"/>
859            </xs:schema>"#,
860        );
861        let validator = SchemaValidator::new(&schema_set, ValidationFlags::default());
862        let mut errors = Vec::new();
863        let mut warnings: Vec<ValidationWarning> = Vec::new();
864        let sink = CollectingValidationSink {
865            errors: &mut errors,
866            warnings: &mut warnings,
867        };
868        let mut runtime = validator.start_run(sink);
869        // Truncated stream: open tag with no close.
870        let res = drive_quick_xml("<root>".as_bytes(), &mut runtime, &schema_set);
871        match res {
872            Err(DriveError::UnexpectedEof { depth }) => assert_eq!(depth, 1),
873            other => panic!("expected UnexpectedEof, got {other:?}"),
874        }
875    }
876
877    #[test]
878    fn unbound_attribute_prefix_errors() {
879        let schema_set = load_schema(
880            r#"<xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema">
881                <xs:element name="root">
882                    <xs:complexType>
883                        <xs:anyAttribute processContents="skip"/>
884                    </xs:complexType>
885                </xs:element>
886            </xs:schema>"#,
887        );
888        let validator = SchemaValidator::new(&schema_set, ValidationFlags::default());
889        let mut errors = Vec::new();
890        let mut warnings: Vec<ValidationWarning> = Vec::new();
891        let sink = CollectingValidationSink {
892            errors: &mut errors,
893            warnings: &mut warnings,
894        };
895        let mut runtime = validator.start_run(sink);
896        let res = drive_quick_xml(
897            r#"<root nope:x="1"/>"#.as_bytes(),
898            &mut runtime,
899            &schema_set,
900        );
901        match res {
902            Err(DriveError::UnboundPrefix(p)) => assert_eq!(p, "nope"),
903            other => panic!("expected UnboundPrefix, got {other:?}"),
904        }
905    }
906
907    #[test]
908    fn buffer_reuse_across_calls() {
909        let schema_set = load_schema(
910            r#"<xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema">
911                <xs:element name="root" type="xs:string"/>
912            </xs:schema>"#,
913        );
914        let validator = SchemaValidator::new(&schema_set, ValidationFlags::default());
915        let mut buf = Vec::new();
916        for _ in 0..2 {
917            let mut errors = Vec::new();
918            let mut warnings: Vec<ValidationWarning> = Vec::new();
919            let sink = CollectingValidationSink {
920                errors: &mut errors,
921                warnings: &mut warnings,
922            };
923            let mut runtime = validator.start_run(sink);
924            let outcome = drive_quick_xml_in(
925                "<root>x</root>".as_bytes(),
926                &mut runtime,
927                &schema_set,
928                &mut buf,
929            )
930            .expect("drive ok");
931            assert!(matches!(outcome.root_validity, Some(SchemaValidity::Valid)));
932            assert!(errors.is_empty());
933        }
934    }
935
936    #[test]
937    fn comment_and_pi_forwarded_to_handler() {
938        struct Capture {
939            comments: Vec<String>,
940            pis: Vec<(String, String)>,
941        }
942        impl ValidationEventHandler for Capture {
943            type Error = Infallible;
944            fn on_comment(&mut self, text: &str) -> Result<(), Self::Error> {
945                self.comments.push(text.to_string());
946                Ok(())
947            }
948            fn on_processing_instruction(
949                &mut self,
950                target: &str,
951                data: &str,
952            ) -> Result<(), Self::Error> {
953                self.pis.push((target.to_string(), data.to_string()));
954                Ok(())
955            }
956        }
957
958        let schema_set = load_schema(
959            r#"<xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema">
960                <xs:element name="root">
961                    <xs:complexType/>
962                </xs:element>
963            </xs:schema>"#,
964        );
965        let validator = SchemaValidator::new(&schema_set, ValidationFlags::default());
966        let mut errors = Vec::new();
967        let mut warnings: Vec<ValidationWarning> = Vec::new();
968        let sink = CollectingValidationSink {
969            errors: &mut errors,
970            warnings: &mut warnings,
971        };
972        let mut runtime = validator.start_run(sink);
973
974        let mut handler = Capture {
975            comments: Vec::new(),
976            pis: Vec::new(),
977        };
978        let _ = drive_quick_xml_with(
979            "<root><!-- hi --><?pi target data?></root>".as_bytes(),
980            &mut runtime,
981            &schema_set,
982            &mut handler,
983        )
984        .expect("drive ok");
985        assert_eq!(handler.comments, vec![" hi ".to_string()]);
986        assert_eq!(handler.pis, vec![("pi".to_string(), "target data".to_string())]);
987    }
988
989    #[test]
990    fn span_offsets_bracket_each_element() {
991        struct Spans {
992            spans: Vec<(usize, usize)>,
993            stack: Vec<usize>,
994        }
995        impl ValidationEventHandler for Spans {
996            type Error = Infallible;
997            fn on_element_start_offset(&mut self, byte_pos: usize) -> Result<(), Self::Error> {
998                self.stack.push(byte_pos);
999                Ok(())
1000            }
1001            fn on_element_end_offset(&mut self, byte_pos: usize) -> Result<(), Self::Error> {
1002                let start = self.stack.pop().expect("balanced span stack");
1003                self.spans.push((start, byte_pos));
1004                Ok(())
1005            }
1006        }
1007
1008        let schema_set = load_schema(
1009            r#"<xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema">
1010                <xs:element name="root">
1011                    <xs:complexType>
1012                        <xs:sequence>
1013                            <xs:element name="b" type="xs:string"/>
1014                        </xs:sequence>
1015                    </xs:complexType>
1016                </xs:element>
1017            </xs:schema>"#,
1018        );
1019        let validator = SchemaValidator::new(&schema_set, ValidationFlags::default());
1020        let mut errors = Vec::new();
1021        let mut warnings: Vec<ValidationWarning> = Vec::new();
1022        let sink = CollectingValidationSink {
1023            errors: &mut errors,
1024            warnings: &mut warnings,
1025        };
1026        let mut runtime = validator.start_run(sink);
1027
1028        let mut handler = Spans {
1029            spans: Vec::new(),
1030            stack: Vec::new(),
1031        };
1032        let xml = "<root><b>hi</b></root>";
1033        drive_quick_xml_with(xml.as_bytes(), &mut runtime, &schema_set, &mut handler)
1034            .expect("drive ok");
1035        // Inner <b> closes before <root>; spans are captured in close order.
1036        assert_eq!(handler.spans.len(), 2);
1037        for (start, end) in &handler.spans {
1038            assert!(end > start);
1039            assert!(*end <= xml.len());
1040        }
1041    }
1042}