Skip to main content

xsd_schema/validation/
active_axis.rs

1//! Streaming matcher for identity-constraint XPath expressions.
2//!
3//! `ActiveAxis` advances through a compiled `Asttree` as SAX-style element
4//! events arrive, reporting matches without buffering the document.
5
6#![allow(dead_code)]
7
8use crate::ids::NameId;
9
10use super::asttree::{AstStep, Asttree};
11
12/// Per-depth matching state for a single path.
13#[derive(Clone)]
14struct MatchContext {
15    /// Step indices waiting to match a child element at the next depth.
16    active_steps: Vec<usize>,
17    /// Step indices that completed (element match) or are pending attribute check
18    /// at this depth. For complete matches the index points at the last `Child`
19    /// step; for attribute-pending matches it points at the `Attribute` step.
20    matched_here: Vec<usize>,
21    /// Whether an attribute tail step is pending (e.g. `foo/@id`).
22    awaiting_attribute: bool,
23    /// Whether to re-inject `first_real_step` at every depth (`.//` descendant).
24    try_from_start: bool,
25}
26
27/// State for a single path alternative within the `Asttree` union.
28struct PathState {
29    /// One context per element depth.
30    context_stack: Vec<MatchContext>,
31    /// First non-`SelfNode` step index.
32    first_real_step: usize,
33    /// Cached from [`AstPath::descendant`](super::asttree::AstPath::descendant).
34    has_descendant_prefix: bool,
35}
36
37/// Streaming matcher that advances through a compiled identity-constraint XPath
38/// as element open/close events arrive.
39pub(crate) struct ActiveAxis {
40    ast: Asttree,
41    current_depth: i32,
42    active: bool,
43    path_states: Vec<PathState>,
44    last_entered_match: bool,
45    last_exited_match: bool,
46    scope_match_flag: bool,
47}
48
49/// Advance past consecutive `SelfNode` steps, returning the index of the
50/// first non-`SelfNode` step (or `steps.len()` if all remaining are `SelfNode`).
51fn skip_self_nodes(steps: &[AstStep], mut idx: usize) -> usize {
52    while idx < steps.len() && matches!(steps[idx], AstStep::SelfNode) {
53        idx += 1;
54    }
55    idx
56}
57
58impl ActiveAxis {
59    /// Create a new matcher from a compiled `Asttree`.
60    pub(crate) fn new(ast: Asttree) -> Self {
61        let path_states = ast
62            .paths
63            .iter()
64            .map(|_| PathState {
65                context_stack: Vec::new(),
66                first_real_step: 0,
67                has_descendant_prefix: false,
68            })
69            .collect();
70
71        Self {
72            ast,
73            current_depth: -1,
74            active: false,
75            path_states,
76            last_entered_match: false,
77            last_exited_match: false,
78            scope_match_flag: false,
79        }
80    }
81
82    /// Initialize matching state for the scope element.
83    ///
84    /// Returns `true` if the expression is a bare `.` (immediate scope match).
85    pub(crate) fn activate(&mut self) -> bool {
86        self.active = true;
87        self.current_depth = 0;
88        self.scope_match_flag = false;
89        self.last_entered_match = false;
90        self.last_exited_match = false;
91
92        for (i, path) in self.ast.paths.iter().enumerate() {
93            let first_real = path
94                .steps
95                .iter()
96                .position(|s| !matches!(s, AstStep::SelfNode))
97                .unwrap_or(path.steps.len());
98
99            let state = &mut self.path_states[i];
100            state.first_real_step = first_real;
101            state.has_descendant_prefix = path.descendant;
102            state.context_stack.clear();
103
104            if first_real >= path.steps.len() {
105                // Bare "." — matches the scope element itself.
106                self.scope_match_flag = true;
107                state.context_stack.push(MatchContext {
108                    active_steps: vec![],
109                    matched_here: vec![],
110                    awaiting_attribute: false,
111                    try_from_start: false,
112                });
113            } else if matches!(path.steps[first_real], AstStep::Attribute(_)) && !path.descendant {
114                // `./@attr` — SelfNode followed by an attribute step (non-descendant).
115                // The attribute belongs to the scope element, so mark it
116                // awaiting immediately (no `move_to_start_element` needed).
117                state.context_stack.push(MatchContext {
118                    active_steps: vec![],
119                    matched_here: vec![first_real],
120                    awaiting_attribute: true,
121                    try_from_start: false,
122                });
123            } else {
124                state.context_stack.push(MatchContext {
125                    active_steps: vec![first_real],
126                    matched_here: vec![],
127                    awaiting_attribute: false,
128                    try_from_start: path.descendant,
129                });
130            }
131        }
132
133        self.scope_match_flag
134    }
135
136    /// Advance depth tracking for an element opening inside a
137    /// processContents="skip" wildcard subtree, without attempting any
138    /// selector / field matches. Pushes an empty match context so descendant
139    /// axes can't expand into the skipped content. Used by IC processing in
140    /// XSD 1.1 to honour wildcard attribution: skipped elements are outside
141    /// the schema's validation scope and must not contribute to keys/uniques
142    /// or keyrefs (see wild101..103).
143    pub(crate) fn move_to_skipped_element(&mut self) {
144        if !self.active {
145            return;
146        }
147        self.current_depth += 1;
148        self.last_entered_match = false;
149        for state in &mut self.path_states {
150            state.context_stack.push(MatchContext {
151                active_steps: Vec::new(),
152                matched_here: Vec::new(),
153                awaiting_attribute: false,
154                try_from_start: false,
155            });
156        }
157    }
158
159    /// Advance matching on an element open event.
160    ///
161    /// Returns `true` if any path completed (or attribute-pending) a match at
162    /// this element.
163    pub(crate) fn move_to_start_element(&mut self, local_name: NameId, ns: NameId) -> bool {
164        if !self.active {
165            return false;
166        }
167
168        self.current_depth += 1;
169        self.last_entered_match = false;
170
171        for (path_idx, path) in self.ast.paths.iter().enumerate() {
172            let state = &mut self.path_states[path_idx];
173
174            // Get candidate steps from parent context.
175            let parent_ctx = match state.context_stack.last() {
176                Some(ctx) => ctx,
177                None => {
178                    // Should not happen while active, but be defensive.
179                    state.context_stack.push(MatchContext {
180                        active_steps: vec![],
181                        matched_here: vec![],
182                        awaiting_attribute: false,
183                        try_from_start: state.has_descendant_prefix,
184                    });
185                    continue;
186                }
187            };
188
189            let mut candidates: Vec<usize> = parent_ctx.active_steps.clone();
190
191            // For descendant paths, also try from the first real step (dedup).
192            if parent_ctx.try_from_start && !candidates.contains(&state.first_real_step) {
193                candidates.push(state.first_real_step);
194            }
195
196            let mut new_active = Vec::new();
197            let mut new_matched = Vec::new();
198            let mut new_awaiting = false;
199
200            for &s in &candidates {
201                if s >= path.steps.len() {
202                    continue;
203                }
204                match &path.steps[s] {
205                    AstStep::SelfNode => {
206                        // SelfNode transparently matches the current node.
207                        // Advance to the next real step.
208                        let next = skip_self_nodes(&path.steps, s + 1);
209                        if next >= path.steps.len() {
210                            new_matched.push(s);
211                        } else {
212                            match &path.steps[next] {
213                                AstStep::Attribute(_) => {
214                                    new_awaiting = true;
215                                    new_matched.push(next);
216                                }
217                                _ => {
218                                    if !new_active.contains(&next) {
219                                        new_active.push(next);
220                                    }
221                                }
222                            }
223                        }
224                    }
225                    AstStep::Child(name_test) => {
226                        if name_test.matches(ns, local_name) {
227                            // Skip any trailing SelfNode steps after this match.
228                            let next = skip_self_nodes(&path.steps, s + 1);
229                            if next >= path.steps.len() {
230                                // Complete element match — no more steps.
231                                new_matched.push(s);
232                            } else {
233                                match &path.steps[next] {
234                                    AstStep::Attribute(_) => {
235                                        new_awaiting = true;
236                                        new_matched.push(next);
237                                    }
238                                    _ => {
239                                        new_active.push(next);
240                                    }
241                                }
242                            }
243                        }
244                    }
245                    AstStep::Attribute(_) => {
246                        // Attribute steps are not matched against elements.
247                    }
248                }
249            }
250
251            if !new_matched.is_empty() {
252                self.last_entered_match = true;
253            }
254
255            state.context_stack.push(MatchContext {
256                active_steps: new_active,
257                matched_here: new_matched,
258                awaiting_attribute: new_awaiting,
259                try_from_start: state.has_descendant_prefix,
260            });
261        }
262
263        self.last_entered_match
264    }
265
266    /// Pop matching context on an element close event.
267    ///
268    /// Returns `true` if the element being closed had any matches.
269    /// Deactivates when the scope element closes (depth goes below 0).
270    pub(crate) fn end_element(&mut self) -> bool {
271        if !self.active {
272            return false;
273        }
274
275        self.last_exited_match = false;
276
277        for state in &mut self.path_states {
278            if let Some(ctx) = state.context_stack.pop() {
279                // Only count element-complete matches.  Contexts with
280                // `awaiting_attribute` hold unconsumed attribute steps —
281                // those must not trigger `exited_match` because the
282                // attribute was never actually present on the element.
283                if !ctx.matched_here.is_empty() && !ctx.awaiting_attribute {
284                    self.last_exited_match = true;
285                }
286            }
287        }
288
289        self.current_depth -= 1;
290        if self.current_depth < 0 {
291            // Exiting the scope element itself.
292            if self.scope_match_flag {
293                self.last_exited_match = true;
294            }
295            self.active = false;
296        }
297
298        self.last_exited_match
299    }
300
301    /// Check whether the given attribute matches a pending attribute step.
302    ///
303    /// Call this after `move_to_start_element` returned `true` when the
304    /// compiled expression ends with an attribute axis (e.g. `foo/@id`).
305    pub(crate) fn matches_attribute(&self, local_name: NameId, ns: NameId) -> bool {
306        if !self.active {
307            return false;
308        }
309
310        for (path_idx, path) in self.ast.paths.iter().enumerate() {
311            let state = &self.path_states[path_idx];
312            if let Some(ctx) = state.context_stack.last() {
313                if ctx.awaiting_attribute {
314                    for &step_idx in &ctx.matched_here {
315                        if step_idx < path.steps.len() {
316                            if let AstStep::Attribute(name_test) = &path.steps[step_idx] {
317                                if name_test.matches(ns, local_name) {
318                                    return true;
319                                }
320                            }
321                        }
322                    }
323                }
324            }
325        }
326
327        false
328    }
329
330    /// Reset for reuse with a new scope element.
331    pub(crate) fn reactivate(&mut self) {
332        self.active = false;
333        self.current_depth = -1;
334        self.scope_match_flag = false;
335        self.last_entered_match = false;
336        self.last_exited_match = false;
337        for state in &mut self.path_states {
338            state.context_stack.clear();
339        }
340    }
341
342    /// Whether the matcher is within the constraint scope.
343    pub(crate) fn is_active(&self) -> bool {
344        self.active
345    }
346
347    /// Whether the last `move_to_start_element` produced a match.
348    pub(crate) fn entered_match(&self) -> bool {
349        self.last_entered_match
350    }
351
352    /// Whether the last `end_element` left a matched scope.
353    pub(crate) fn exited_match(&self) -> bool {
354        self.last_exited_match
355    }
356
357    /// Whether the expression is a bare `.` that matches the scope element.
358    pub(crate) fn scope_match(&self) -> bool {
359        self.scope_match_flag
360    }
361}
362
363#[cfg(test)]
364mod tests {
365    use super::*;
366    use crate::namespace::context::NamespaceContextSnapshot;
367    use crate::namespace::table::{well_known, NameTable};
368    use crate::schema::model::XsdVersion;
369    use crate::validation::asttree::Asttree;
370
371    fn compile_sel(xpath: &str, table: &NameTable) -> Asttree {
372        let snap = NamespaceContextSnapshot::default();
373        Asttree::compile_selector(xpath, &snap, table, None, None, None, XsdVersion::V1_0).unwrap()
374    }
375
376    fn compile_fld(xpath: &str, table: &NameTable) -> Asttree {
377        let snap = NamespaceContextSnapshot::default();
378        Asttree::compile_field(xpath, &snap, table, None, None, None, XsdVersion::V1_0).unwrap()
379    }
380
381    fn compile_sel_with_ns(
382        xpath: &str,
383        table: &NameTable,
384        snap: &NamespaceContextSnapshot,
385    ) -> Asttree {
386        Asttree::compile_selector(xpath, snap, table, None, None, None, XsdVersion::V1_0).unwrap()
387    }
388
389    /// `foo/bar`: enter `<foo>` (no match), enter `<bar>` (match), exit both.
390    #[test]
391    fn simple_path() {
392        let table = NameTable::new();
393        let ast = compile_sel("foo/bar", &table);
394        let mut axis = ActiveAxis::new(ast);
395
396        let foo = table.add("foo");
397        let bar = table.add("bar");
398
399        assert!(!axis.activate());
400
401        // Enter <foo> — partial match only
402        assert!(!axis.move_to_start_element(foo, well_known::EMPTY));
403        // Enter <bar> — complete match
404        assert!(axis.move_to_start_element(bar, well_known::EMPTY));
405
406        // Exit </bar> — exiting a matched element
407        assert!(axis.end_element());
408        assert!(axis.exited_match());
409        // Exit </foo> — no match at this level
410        assert!(!axis.end_element());
411        // Exit scope
412        axis.end_element();
413        assert!(!axis.is_active());
414    }
415
416    /// `.//bar`: match at depth 1 and depth 3.
417    #[test]
418    fn descendant_any_depth() {
419        let table = NameTable::new();
420        let ast = compile_sel(".//bar", &table);
421        let mut axis = ActiveAxis::new(ast);
422
423        let a = table.add("a");
424        let bar = table.add("bar");
425
426        assert!(!axis.activate());
427
428        // Match at depth 1
429        assert!(axis.move_to_start_element(bar, well_known::EMPTY));
430        axis.end_element();
431
432        // Match at depth 3: <a><a><bar/>
433        assert!(!axis.move_to_start_element(a, well_known::EMPTY));
434        assert!(!axis.move_to_start_element(a, well_known::EMPTY));
435        assert!(axis.move_to_start_element(bar, well_known::EMPTY));
436        axis.end_element(); // </bar>
437        axis.end_element(); // </a>
438        axis.end_element(); // </a>
439    }
440
441    /// `.//a/b` with `<a><a><b/>`: nested `<a>` both produce step-1 candidates.
442    #[test]
443    fn overlapping_descendant() {
444        let table = NameTable::new();
445        let ast = compile_sel(".//a/b", &table);
446        let mut axis = ActiveAxis::new(ast);
447
448        let a = table.add("a");
449        let b = table.add("b");
450
451        assert!(!axis.activate());
452
453        // <a> — partial match
454        assert!(!axis.move_to_start_element(a, well_known::EMPTY));
455        // <a> (nested) — starts a new a/b candidate via descendant
456        assert!(!axis.move_to_start_element(a, well_known::EMPTY));
457        // <b> — matches the inner <a><b> path
458        assert!(axis.move_to_start_element(b, well_known::EMPTY));
459
460        axis.end_element(); // </b>
461        axis.end_element(); // </a>
462        axis.end_element(); // </a>
463    }
464
465    /// `a|b|c`: each alternative matches independently.
466    #[test]
467    fn union_paths() {
468        let table = NameTable::new();
469        let ast = compile_sel("a|b|c", &table);
470        let mut axis = ActiveAxis::new(ast);
471
472        let a = table.add("a");
473        let b = table.add("b");
474        let c = table.add("c");
475        let x = table.add("x");
476
477        assert!(!axis.activate());
478
479        assert!(axis.move_to_start_element(a, well_known::EMPTY));
480        axis.end_element();
481
482        assert!(axis.move_to_start_element(b, well_known::EMPTY));
483        axis.end_element();
484
485        assert!(axis.move_to_start_element(c, well_known::EMPTY));
486        axis.end_element();
487
488        assert!(!axis.move_to_start_element(x, well_known::EMPTY));
489        axis.end_element();
490    }
491
492    /// `foo/@id`: element NOT final, `matches_attribute("id")` returns true.
493    #[test]
494    fn attribute_field() {
495        let table = NameTable::new();
496        let ast = compile_fld("foo/@id", &table);
497        let mut axis = ActiveAxis::new(ast);
498
499        let foo = table.add("foo");
500        let id = table.add("id");
501        let other = table.add("other");
502
503        assert!(!axis.activate());
504
505        // Enter <foo> — has pending attribute
506        assert!(axis.move_to_start_element(foo, well_known::EMPTY));
507
508        // Attribute @id matches
509        assert!(axis.matches_attribute(id, well_known::EMPTY));
510        // Attribute @other does not
511        assert!(!axis.matches_attribute(other, well_known::EMPTY));
512
513        axis.end_element();
514    }
515
516    /// `foo/@*`: any attribute matches after `<foo>`.
517    #[test]
518    fn attribute_wildcard() {
519        let table = NameTable::new();
520        let ast = compile_fld("foo/@*", &table);
521        let mut axis = ActiveAxis::new(ast);
522
523        let foo = table.add("foo");
524        let anything = table.add("anything");
525        let other = table.add("other");
526
527        assert!(!axis.activate());
528
529        assert!(axis.move_to_start_element(foo, well_known::EMPTY));
530
531        // Any attribute matches @*
532        assert!(axis.matches_attribute(anything, well_known::EMPTY));
533        assert!(axis.matches_attribute(other, well_known::EMPTY));
534
535        axis.end_element();
536    }
537
538    /// `.`: `activate()` returns true, `scope_match()` is true.
539    #[test]
540    fn self_selector() {
541        let table = NameTable::new();
542        let ast = compile_sel(".", &table);
543        let mut axis = ActiveAxis::new(ast);
544
545        assert!(axis.activate());
546        assert!(axis.scope_match());
547    }
548
549    /// `./foo`: equivalent to `foo`, matches child.
550    #[test]
551    fn self_then_child() {
552        let table = NameTable::new();
553        let ast = compile_sel("./foo", &table);
554        let mut axis = ActiveAxis::new(ast);
555
556        let foo = table.add("foo");
557        let bar = table.add("bar");
558
559        assert!(!axis.activate());
560
561        // ./foo is equivalent to foo
562        assert!(axis.move_to_start_element(foo, well_known::EMPTY));
563        axis.end_element();
564
565        assert!(!axis.move_to_start_element(bar, well_known::EMPTY));
566        axis.end_element();
567    }
568
569    /// Prefixed name: match only with correct namespace.
570    #[test]
571    fn namespace_match() {
572        let table = NameTable::new();
573        let ns = table.add("http://example.com");
574        let prefix = table.add("p");
575        let snap = NamespaceContextSnapshot {
576            default_ns: None,
577            bindings: vec![(prefix, ns)],
578        };
579        let ast = compile_sel_with_ns("p:foo", &table, &snap);
580        let mut axis = ActiveAxis::new(ast);
581
582        let foo = table.add("foo");
583        let other_ns = table.add("http://other.com");
584
585        assert!(!axis.activate());
586
587        // Correct namespace — match
588        assert!(axis.move_to_start_element(foo, ns));
589        axis.end_element();
590
591        // Wrong namespace — no match
592        assert!(!axis.move_to_start_element(foo, other_ns));
593        axis.end_element();
594
595        // No namespace — no match
596        assert!(!axis.move_to_start_element(foo, well_known::EMPTY));
597        axis.end_element();
598    }
599
600    /// `*`: any element matches.
601    #[test]
602    fn wildcard_match() {
603        let table = NameTable::new();
604        let ast = compile_sel("*", &table);
605        let mut axis = ActiveAxis::new(ast);
606
607        let any_name = table.add("anything");
608        let ns = table.add("http://example.com");
609
610        assert!(!axis.activate());
611
612        assert!(axis.move_to_start_element(any_name, well_known::EMPTY));
613        axis.end_element();
614
615        assert!(axis.move_to_start_element(any_name, ns));
616        axis.end_element();
617    }
618
619    /// After exiting scope element, `is_active()` is false.
620    #[test]
621    fn scope_deactivation() {
622        let table = NameTable::new();
623        let ast = compile_sel("foo", &table);
624        let mut axis = ActiveAxis::new(ast);
625
626        let foo = table.add("foo");
627
628        assert!(!axis.activate());
629        assert!(axis.is_active());
630
631        // Enter and exit a child
632        assert!(axis.move_to_start_element(foo, well_known::EMPTY));
633        axis.end_element();
634
635        // Exit the scope element
636        axis.end_element();
637        assert!(!axis.is_active());
638    }
639
640    /// `a/b`: enter `<x><y><z>` — no matches.
641    #[test]
642    fn no_match_deep() {
643        let table = NameTable::new();
644        let ast = compile_sel("a/b", &table);
645        let mut axis = ActiveAxis::new(ast);
646
647        let x = table.add("x");
648        let y = table.add("y");
649        let z = table.add("z");
650
651        assert!(!axis.activate());
652
653        assert!(!axis.move_to_start_element(x, well_known::EMPTY));
654        assert!(!axis.move_to_start_element(y, well_known::EMPTY));
655        assert!(!axis.move_to_start_element(z, well_known::EMPTY));
656
657        axis.end_element();
658        axis.end_element();
659        axis.end_element();
660    }
661
662    /// After deactivation, `reactivate()` + `activate()` gives fresh state.
663    #[test]
664    fn reactivate_reuse() {
665        let table = NameTable::new();
666        let ast = compile_sel("foo", &table);
667        let mut axis = ActiveAxis::new(ast);
668
669        let foo = table.add("foo");
670
671        // First use
672        assert!(!axis.activate());
673        assert!(axis.move_to_start_element(foo, well_known::EMPTY));
674        axis.end_element();
675        axis.end_element(); // exit scope
676        assert!(!axis.is_active());
677
678        // Reactivate for a new scope
679        axis.reactivate();
680        assert!(!axis.is_active());
681
682        assert!(!axis.activate());
683        assert!(axis.is_active());
684
685        // Should match again
686        assert!(axis.move_to_start_element(foo, well_known::EMPTY));
687        axis.end_element();
688    }
689
690    /// `foo/./bar`: mid-path SelfNode is transparent, equivalent to `foo/bar`.
691    #[test]
692    fn mid_path_self_node() {
693        let table = NameTable::new();
694        let ast = compile_sel("foo/./bar", &table);
695        let mut axis = ActiveAxis::new(ast);
696
697        let foo = table.add("foo");
698        let bar = table.add("bar");
699
700        assert!(!axis.activate());
701
702        // Enter <foo> — partial match
703        assert!(!axis.move_to_start_element(foo, well_known::EMPTY));
704        // Enter <bar> — complete match (SelfNode skipped)
705        assert!(axis.move_to_start_element(bar, well_known::EMPTY));
706
707        axis.end_element(); // </bar>
708        axis.end_element(); // </foo>
709    }
710
711    /// `foo/.`: trailing SelfNode, equivalent to `foo`.
712    #[test]
713    fn trailing_self_node() {
714        let table = NameTable::new();
715        let ast = compile_sel("foo/.", &table);
716        let mut axis = ActiveAxis::new(ast);
717
718        let foo = table.add("foo");
719
720        assert!(!axis.activate());
721
722        // Enter <foo> — complete match (trailing SelfNode consumed)
723        assert!(axis.move_to_start_element(foo, well_known::EMPTY));
724
725        axis.end_element();
726    }
727
728    /// `.`: exiting the scope element signals `exited_match`.
729    #[test]
730    fn self_selector_exit() {
731        let table = NameTable::new();
732        let ast = compile_sel(".", &table);
733        let mut axis = ActiveAxis::new(ast);
734
735        assert!(axis.activate());
736        assert!(axis.scope_match());
737
738        // Exit the scope element — should signal exit match
739        assert!(axis.end_element());
740        assert!(axis.exited_match());
741        assert!(!axis.is_active());
742    }
743
744    /// `.//a`: matches `<a>` at multiple depths.
745    #[test]
746    fn descendant_single() {
747        let table = NameTable::new();
748        let ast = compile_sel(".//a", &table);
749        let mut axis = ActiveAxis::new(ast);
750
751        let a = table.add("a");
752        let x = table.add("x");
753
754        assert!(!axis.activate());
755
756        // <a> at depth 1
757        assert!(axis.move_to_start_element(a, well_known::EMPTY));
758        // <a> at depth 2 (nested under first <a>)
759        assert!(axis.move_to_start_element(a, well_known::EMPTY));
760        axis.end_element(); // </a> inner
761                            // <x> at depth 2 — no match
762        assert!(!axis.move_to_start_element(x, well_known::EMPTY));
763        axis.end_element(); // </x>
764        axis.end_element(); // </a> outer
765    }
766}