simdxml 0.2.1

SIMD-accelerated XML parser with full XPath 1.0 support
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
//! Flat-array structural index for XML documents.
//!
//! Instead of building a DOM tree with heap-allocated nodes and pointer-chasing,
//! this module represents XML structure as parallel arrays (struct-of-arrays layout).
//! Each tag gets an entry in `tag_starts`, `tag_ends`, `tag_types`, `tag_names`,
//! `depths`, and `parents` — all indexed by the same tag position.
//!
//! Memory overhead is ~16 bytes per tag vs ~35 bytes per node in a typical DOM.
//!
//! Parent-child and sibling relationships are stored in CSR (Compressed Sparse Row)
//! format, built lazily on first XPath evaluation. Tag names are interned to `u16`
//! IDs with an inverted posting index for O(1) name-to-tags lookup.
//!
//! The index supports all 13 XPath 1.0 axes via array operations:
//! - O(1) ancestor/descendant checks using pre/post-order numbering
//! - O(1) child enumeration via CSR offsets
//! - O(1) tag name matching via interned name IDs

#[doc(hidden)]
pub mod lazy;
#[doc(hidden)]
pub mod structural;
pub(crate) mod tags;

/// The structural index — flat arrays, no DOM tree.
///
/// Built from XML bytes in one pass (scalar or SIMD). Enables random-access
/// evaluation of all 13 XPath 1.0 axes via array operations instead of
/// pointer-chasing through a DOM.
///
/// Memory: ~16 bytes per tag vs ~35 bytes per node in a typical DOM.
///
/// # Examples
///
/// ```rust
/// let xml = b"<root><item>hello</item><item>world</item></root>";
/// let mut index = simdxml::parse(xml).unwrap();
///
/// // Lazy index build happens automatically on first query
/// let texts = index.xpath_text("//item").unwrap();
/// assert_eq!(texts, vec!["hello", "world"]);
///
/// // Or build indices explicitly for repeated queries
/// index.ensure_indices();
/// index.build_name_index();
/// ```
pub struct XmlIndex<'a> {
    /// Original XML bytes (borrowed, not copied)
    pub(crate) input: &'a [u8],

    /// Byte offset of each '<' (start of each tag/comment/PI)
    pub tag_starts: Vec<u64>,

    /// Byte offset of each '>' (end of each tag/comment/PI)
    pub tag_ends: Vec<u64>,

    /// Tag type classification
    pub(crate) tag_types: Vec<TagType>,

    /// Tag name: (byte offset, length) into input
    pub(crate) tag_names: Vec<(u64, u16)>,

    /// Nesting depth of each tag (0 = root level)
    pub(crate) depths: Vec<u16>,

    /// Index of parent tag (into tag_starts array). Root tags have parent = u32::MAX.
    pub parents: Vec<u32>,

    /// Text content ranges: (start_offset, end_offset) for text between tags
    pub text_ranges: Vec<TextRange>,

    // === Precomputed indices (built by `build_indices()`) ===

    /// CSR children: offsets[i]..offsets[i+1] into child_data gives children of tag i.
    pub(crate) child_offsets: Vec<u32>,
    /// Flat array of child tag indices, referenced by child_offsets.
    pub(crate) child_data: Vec<u32>,

    /// CSR text children: text_offsets[i]..text_offsets[i+1] into text_data.
    pub(crate) text_child_offsets: Vec<u32>,
    /// Flat array of text range indices, referenced by text_child_offsets.
    pub(crate) text_child_data: Vec<u32>,

    /// Matching close tag for each open tag. u32::MAX = no match.
    pub(crate) close_map: Vec<u32>,

    /// Post-order number for each tag. Enables O(1) ancestor/descendant checks:
    /// A is ancestor of B iff pre(A) < pre(B) AND post(A) > post(B).
    /// (Pre-order number is just the tag index itself.)
    pub(crate) post_order: Vec<u32>,

    // === Tag name interning ===

    /// Interned name ID per tag. Same name → same ID. u16::MAX = no name.
    pub name_ids: Vec<u16>,
    /// Unique name strings: name_id → (byte_offset, length) in input.
    pub name_table: Vec<(u64, u16)>,
    /// Inverted index: name_id → sorted list of tag indices (Open/SelfClose only).
    pub name_posting: Vec<Vec<u32>>,
}

#[derive(Debug, Clone, Copy, PartialEq, Eq)]
#[repr(u8)]
pub enum TagType {
    /// An opening tag: `<tag>` or `<tag attr="val">`
    Open = 0,
    /// A closing tag: `</tag>`
    Close = 1,
    /// A self-closing tag: `<tag/>` or `<tag attr="val"/>`
    SelfClose = 2,
    /// An XML comment: `<!-- ... -->`
    Comment = 3,
    /// A CDATA section: `<![CDATA[ ... ]]>`
    CData = 4,
    /// A processing instruction: `<?target ... ?>`
    PI = 5,
}

impl TagType {
    /// Convert from u8 (for deserialization). Returns None for invalid values.
    pub fn from_u8(v: u8) -> Option<Self> {
        match v {
            0 => Some(TagType::Open),
            1 => Some(TagType::Close),
            2 => Some(TagType::SelfClose),
            3 => Some(TagType::Comment),
            4 => Some(TagType::CData),
            5 => Some(TagType::PI),
            _ => None,
        }
    }
}

/// Interns tag name byte slices to u16 IDs during parsing.
/// Uses linear search over a small table (typically 20-200 unique names).
/// Zero heap allocation per intern call — just byte comparison.
pub(crate) struct NameInterner<'a> {
    input: &'a [u8],
    table: Vec<(u64, u16)>,
    map: Option<std::collections::HashMap<u64, u16>>,
}

impl<'a> NameInterner<'a> {
    pub fn new(input: &'a [u8]) -> Self {
        Self { input, table: Vec::with_capacity(64), map: None }
    }

    #[inline]
    fn fnv1a(bytes: &[u8]) -> u64 {
        let mut h: u64 = 0xcbf29ce484222325;
        for &b in bytes { h ^= b as u64; h = h.wrapping_mul(0x100000001b3); }
        h
    }

    /// Intern a name, returning its ID.
    /// Linear scan for <256 unique names, hash map above that.
    /// On hash collision, falls back to linear scan for correctness.
    #[inline]
    pub fn intern(&mut self, name_bytes: &[u8], offset: u64, len: u16) -> u16 {
        if let Some(ref map) = self.map {
            let hash = Self::fnv1a(name_bytes);
            if let Some(&id) = map.get(&hash) {
                let (off, l) = self.table[id as usize];
                if l == len && &self.input[off as usize..off as usize + l as usize] == name_bytes {
                    return id;
                }
                // Hash collision: bytes don't match. Fall back to linear scan.
                for (i, &(off2, l2)) in self.table.iter().enumerate() {
                    if l2 == len && &self.input[off2 as usize..off2 as usize + l2 as usize] == name_bytes {
                        return i as u16;
                    }
                }
                // Not found anywhere — insert new entry (don't overwrite map slot)
                let new_id = self.table.len().min(u16::MAX as usize) as u16;
                self.table.push((offset, len));
                return new_id;
            }
            // No map entry for this hash — insert
            let id = self.table.len().min(u16::MAX as usize) as u16;
            self.table.push((offset, len));
            self.map.as_mut().unwrap().insert(hash, id);
            return id;
        }

        // Linear scan for small tables
        for (id, &(off, l)) in self.table.iter().enumerate() {
            if l == len && &self.input[off as usize..off as usize + l as usize] == name_bytes {
                return id as u16;
            }
        }
        let id = self.table.len() as u16;
        self.table.push((offset, len));

        // Switch to hash map at threshold
        if self.table.len() == 256 {
            let mut map = std::collections::HashMap::with_capacity(512);
            for (i, &(off, l)) in self.table.iter().enumerate() {
                let bytes = &self.input[off as usize..off as usize + l as usize];
                map.insert(Self::fnv1a(bytes), i as u16);
            }
            self.map = Some(map);
        }
        id
    }

    pub fn into_table(self) -> Vec<(u64, u16)> {
        self.table
    }
}

/// A text content range between tags.
///
/// Represents a contiguous run of text bytes in the original XML input.
/// The `parent_tag` is the index of the innermost enclosing open tag.
/// For text between sibling elements (e.g., `<a>text<b/>more</a>`),
/// `parent_tag` points to the common parent (`<a>` in this example).
#[derive(Debug, Clone, Copy)]
#[repr(C)]
pub struct TextRange {
    /// Byte offset of text start in the original XML input.
    pub start: u64,
    /// Byte offset of text end (exclusive) in the original XML input.
    pub end: u64,
    /// Index of the parent open tag in the [`XmlIndex`] arrays.
    /// Set to `u32::MAX` for text at the document root level.
    pub parent_tag: u32,
}

impl<'a> XmlIndex<'a> {
    /// Ensure precomputed indices are built. No-op if already built.
    pub fn ensure_indices(&mut self) {
        if !self.has_indices() && self.tag_count() >= 1 {
            self.build_indices();
        }
    }

    /// Build precomputed indices for fast XPath evaluation.
    /// Called once after structural parsing. O(n) time, flat memory layout.
    ///
    /// Uses threaded CSR builds for large documents (>10K tags) where the
    /// parallelism benefit exceeds thread spawn overhead (~25µs). Small/medium
    /// documents use sequential builds to avoid the overhead penalty.
    pub(crate) fn build_indices(&mut self) {
        let n = self.tag_count();
        let need_close_map = self.close_map.is_empty();

        // Threshold: thread spawn costs ~25µs. Below 10K tags, sequential is faster.
        if n < 10_000 {
            let (co, cd) = build_csr_children(&self.tag_types, &self.parents, n);
            let (tco, tcd) = build_csr_text_children(&self.text_ranges, n);
            self.child_offsets = co;
            self.child_data = cd;
            self.text_child_offsets = tco;
            self.text_child_data = tcd;
            if need_close_map {
                let (cm, po) = build_close_map_and_post_order(&self.tag_types, n);
                self.close_map = cm;
                self.post_order = po;
            }
            return;
        }

        // Large documents: run CSR builds concurrently
        let tag_types = &self.tag_types;
        let parents = &self.parents;
        let text_ranges = &self.text_ranges;

        let (child_offsets, child_data, text_child_offsets, text_child_data,
             close_map, post_order) = std::thread::scope(|scope| {
            let csr_children = scope.spawn(move || {
                build_csr_children(tag_types, parents, n)
            });
            let csr_text = scope.spawn(move || {
                build_csr_text_children(text_ranges, n)
            });
            let (cm, po) = if need_close_map {
                build_close_map_and_post_order(tag_types, n)
            } else {
                (Vec::new(), Vec::new())
            };
            let (co, cd) = csr_children.join().unwrap();
            let (tco, tcd) = csr_text.join().unwrap();
            (co, cd, tco, tcd, cm, po)
        });

        self.child_offsets = child_offsets;
        self.child_data = child_data;
        self.text_child_offsets = text_child_offsets;
        self.text_child_data = text_child_data;
        if need_close_map {
            self.close_map = close_map;
            self.post_order = post_order;
        }
    }

    /// Get child tag indices for a parent (from precomputed CSR index).
    #[inline]
    pub(crate) fn child_tag_slice(&self, parent_idx: usize) -> &[u32] {
        if self.child_offsets.len() < 2 || parent_idx + 1 >= self.child_offsets.len() {
            return &[];
        }
        let start = self.child_offsets[parent_idx] as usize;
        let end = self.child_offsets[parent_idx + 1] as usize;
        &self.child_data[start..end]
    }

    /// Get child text range indices for a parent (from precomputed CSR index).
    #[inline]
    pub fn child_text_slice(&self, parent_idx: usize) -> &[u32] {
        if self.text_child_offsets.len() < 2 || parent_idx + 1 >= self.text_child_offsets.len() {
            return &[];
        }
        let start = self.text_child_offsets[parent_idx] as usize;
        let end = self.text_child_offsets[parent_idx + 1] as usize;
        &self.text_child_data[start..end]
    }

    /// Whether precomputed CSR indices are available.
    #[inline]
    pub(crate) fn has_indices(&self) -> bool {
        !self.child_offsets.is_empty()
    }

    /// O(1) ancestor check using pre/post numbering.
    /// Returns true if `ancestor_idx` is an ancestor of `descendant_idx`.
    #[inline]
    pub(crate) fn is_ancestor(&self, ancestor_idx: usize, descendant_idx: usize) -> bool {
        if self.post_order.is_empty() { return false; }
        // pre(A) < pre(B) AND post(A) > post(B)
        ancestor_idx < descendant_idx
            && self.post_order[ancestor_idx] > self.post_order[descendant_idx]
    }

    /// Build inverted name index for repeated query workloads.
    /// Call this once before evaluating many XPath expressions on the same document.
    pub fn build_name_index(&mut self) {
        if !self.name_posting.is_empty() { return; }
        let n = self.tag_count();
        let mut interner = NameInterner::new(self.input);
        self.name_ids = Vec::with_capacity(n);
        for i in 0..n {
            let (off, len) = self.tag_names[i];
            if len > 0 {
                let name_bytes = &self.input[off as usize..off as usize + len as usize];
                self.name_ids.push(interner.intern(name_bytes, off, len));
            } else {
                self.name_ids.push(u16::MAX);
            }
        }
        self.name_table = interner.into_table();
        let num_names = self.name_table.len();
        let mut posting: Vec<Vec<u32>> = vec![Vec::new(); num_names];
        for i in 0..n {
            let nid = self.name_ids[i];
            if nid != u16::MAX && (nid as usize) < num_names {
                let tt = self.tag_types[i];
                if tt == TagType::Open || tt == TagType::SelfClose {
                    posting[nid as usize].push(i as u32);
                }
            }
        }
        self.name_posting = posting;
    }

    /// Look up the interned name ID for a name string. Returns None if not found.
    #[inline]
    pub fn name_id(&self, name: &str) -> Option<u16> {
        let name_bytes = name.as_bytes();
        for (id, &(off, len)) in self.name_table.iter().enumerate() {
            if len as usize == name_bytes.len()
                && &self.input[off as usize..off as usize + len as usize] == name_bytes
            {
                return Some(id as u16);
            }
        }
        None
    }

    /// Get the posting list (sorted tag indices) for a name. O(1) lookup.
    #[inline]
    pub fn tags_by_name(&self, name: &str) -> &[u32] {
        if let Some(id) = self.name_id(name) {
            if (id as usize) < self.name_posting.len() {
                return &self.name_posting[id as usize];
            }
        }
        &[]
    }

    /// Fast tag name comparison (avoids UTF-8 validation on the hot path).
    #[inline(always)]
    pub fn tag_name_eq(&self, tag_idx: usize, name: &str) -> bool {
        if tag_idx >= self.tag_names.len() { return false; }
        let (off, len) = self.tag_names[tag_idx];
        let name_bytes = name.as_bytes();
        if name_bytes.len() != len as usize { return false; }
        &self.input[off as usize..off as usize + len as usize] == name_bytes
    }

    /// Get the tag name as a string slice.
    #[inline]
    pub fn tag_name(&self, tag_idx: usize) -> &'a str {
        if tag_idx >= self.tag_names.len() {
            return "";
        }
        let (offset, len) = self.tag_names[tag_idx];
        let bytes = &self.input[offset as usize..(offset + len as u64) as usize];
        // Safety: XML input is validated during parsing; tag names are always valid UTF-8.
        unsafe { std::str::from_utf8_unchecked(bytes) }
    }

    /// Get the text content of a text range by its index.
    #[inline]
    pub fn text_by_index(&self, text_idx: usize) -> &'a str {
        self.text_content(&self.text_ranges[text_idx])
    }

    /// Get the text content of a text range.
    #[inline]
    pub fn text_content(&self, range: &TextRange) -> &'a str {
        let bytes = &self.input[range.start as usize..range.end as usize];
        // Safety: text content comes from valid XML input.
        unsafe { std::str::from_utf8_unchecked(bytes) }
    }

    /// Decode XML entities in a string. Returns borrowed if no entities present.
    pub fn decode_entities(s: &str) -> std::borrow::Cow<'_, str> {
        if !s.contains('&') {
            return std::borrow::Cow::Borrowed(s);
        }
        let mut result = String::with_capacity(s.len());
        let mut chars = s.chars().peekable();
        while let Some(c) = chars.next() {
            if c == '&' {
                let mut entity = String::new();
                for ec in chars.by_ref() {
                    if ec == ';' { break; }
                    entity.push(ec);
                }
                match entity.as_str() {
                    "amp" => result.push('&'),
                    "lt" => result.push('<'),
                    "gt" => result.push('>'),
                    "apos" => result.push('\''),
                    "quot" => result.push('"'),
                    e if e.starts_with('#') => {
                        let num = &e[1..];
                        let code = if let Some(hex) = num.strip_prefix('x') {
                            u32::from_str_radix(hex, 16).ok()
                        } else {
                            num.parse::<u32>().ok()
                        };
                        if let Some(ch) = code.and_then(char::from_u32) {
                            result.push(ch);
                        }
                    }
                    _ => { result.push('&'); result.push_str(&entity); result.push(';'); }
                }
            } else {
                result.push(c);
            }
        }
        std::borrow::Cow::Owned(result)
    }

    /// Number of tags in the index.
    pub fn tag_count(&self) -> usize {
        self.tag_starts.len()
    }

    /// Number of text content ranges.
    pub fn text_count(&self) -> usize {
        self.text_ranges.len()
    }

    /// Get the tag type at the given index.
    #[inline]
    pub fn tag_type(&self, idx: usize) -> TagType {
        self.tag_types[idx]
    }

    /// Get the nesting depth of a tag (0 = root level).
    #[inline]
    pub fn depth(&self, idx: usize) -> u16 {
        self.depths[idx]
    }

    /// Maximum nesting depth in the document.
    pub fn max_depth(&self) -> u16 {
        self.depths.iter().copied().max().unwrap_or(0)
    }

    /// Find the index of the close tag matching an open tag.
    pub fn matching_close(&self, open_idx: usize) -> Option<usize> {
        if open_idx >= self.tag_count() {
            return None;
        }
        // Use precomputed close_map if available
        if !self.close_map.is_empty() {
            let close = self.close_map[open_idx];
            return if close != u32::MAX { Some(close as usize) } else { None };
        }
        // Fallback: linear scan (used before build_indices)
        if self.tag_types[open_idx] == TagType::SelfClose {
            return Some(open_idx);
        }
        if self.tag_types[open_idx] != TagType::Open {
            return None;
        }
        let depth = self.depths[open_idx];
        let name = self.tag_name(open_idx);
        for i in (open_idx + 1)..self.tag_count() {
            if self.tag_types[i] == TagType::Close
                && self.depths[i] == depth
                && self.tag_name(i) == name
            {
                return Some(i);
            }
        }
        None
    }

    /// Get children (direct child open/self-close tags) of a tag.
    pub fn children(&self, parent_idx: usize) -> Vec<usize> {
        if self.has_indices() {
            self.child_tag_slice(parent_idx).iter().map(|&i| i as usize).collect()
        } else {
            (0..self.tag_count())
                .filter(|&i| self.parents[i] == parent_idx as u32
                    && (self.tag_types[i] == TagType::Open || self.tag_types[i] == TagType::SelfClose))
                .collect()
        }
    }

    /// Get text content directly under a tag (not nested).
    pub fn direct_text(&self, tag_idx: usize) -> Vec<&'a str> {
        if self.has_indices() {
            self.child_text_slice(tag_idx).iter()
                .map(|&ti| self.text_content(&self.text_ranges[ti as usize]))
                .collect()
        } else {
            self.text_ranges.iter()
                .filter(|r| r.parent_tag == tag_idx as u32)
                .map(|r| self.text_content(r))
                .collect()
        }
    }

    /// Get the raw XML bytes for an element (from opening tag through closing tag).
    /// For self-closing tags, returns just the tag. For text nodes, returns the text.
    pub fn raw_xml(&self, tag_idx: usize) -> &'a str {
        let start = self.tag_starts[tag_idx] as usize;
        if self.tag_types[tag_idx] == TagType::SelfClose {
            let end = self.tag_ends[tag_idx] as usize + 1;
            return unsafe { std::str::from_utf8_unchecked(&self.input[start..end]) };
        }
        if let Some(close_idx) = self.matching_close(tag_idx) {
            let end = self.tag_ends[close_idx] as usize + 1;
            return unsafe { std::str::from_utf8_unchecked(&self.input[start..end]) };
        }
        // Fallback: just the opening tag
        let end = self.tag_ends[tag_idx] as usize + 1;
        unsafe { std::str::from_utf8_unchecked(&self.input[start..end]) }
    }

    /// Get the raw XML bytes for the opening tag at a given index.
    pub fn raw_tag(&self, tag_idx: usize) -> &'a str {
        let start = self.tag_starts[tag_idx] as usize;
        let end = self.tag_ends[tag_idx] as usize + 1;
        unsafe { std::str::from_utf8_unchecked(&self.input[start..end]) }
    }

    /// Get all text content under a tag (including nested).
    /// Uses precomputed text ranges instead of byte-by-byte tag stripping.
    pub fn all_text(&self, tag_idx: usize) -> String {
        let close_idx = self.matching_close(tag_idx).unwrap_or(tag_idx);
        let tag_start = self.tag_starts[tag_idx];
        let tag_end = if close_idx == tag_idx {
            self.tag_ends[tag_idx]
        } else {
            self.tag_starts[close_idx]
        };

        let mut result = String::new();

        // Binary search for the first text range at or after tag_start.
        // Text ranges are ordered by start offset, so we can skip ahead.
        let start_idx = self.text_ranges
            .partition_point(|r| r.start < tag_start);

        for range in &self.text_ranges[start_idx..] {
            if range.start > tag_end { break; }
            if range.end <= tag_end {
                result.push_str(self.text_content(range));
            }
        }
        result
    }

    // === Element navigation and zero-allocation child access ===

    /// Get the parent tag index of an element.
    /// Returns `None` for root-level elements or out-of-range indices.
    pub fn parent(&self, tag_idx: usize) -> Option<usize> {
        if tag_idx >= self.parents.len() {
            return None;
        }
        let p = self.parents[tag_idx];
        if p == u32::MAX { None } else { Some(p as usize) }
    }

    /// Get the position of a tag within its parent's children list.
    /// Returns `None` for root-level elements.
    pub fn child_position(&self, tag_idx: usize) -> Option<usize> {
        let parent = self.parent(tag_idx)?;
        if self.has_indices() {
            let slice = self.child_tag_slice(parent);
            slice.iter().position(|&c| c as usize == tag_idx)
        } else {
            let children = self.children(parent);
            children.iter().position(|&c| c == tag_idx)
        }
    }

    /// Direct child tag indices as a borrowed slice (zero allocation).
    ///
    /// Requires `ensure_indices()` to have been called. Returns an empty
    /// slice if indices haven't been built.
    pub fn child_slice(&self, parent_idx: usize) -> &[u32] {
        self.child_tag_slice(parent_idx)
    }

    /// Number of direct child elements.
    ///
    /// Zero allocation when indices are built; falls back to `children()`
    /// (which allocates) if not.
    pub fn child_count(&self, parent_idx: usize) -> usize {
        if self.has_indices() {
            self.child_tag_slice(parent_idx).len()
        } else {
            self.children(parent_idx).len()
        }
    }

    /// Get the i-th child element by position.
    ///
    /// Zero allocation when indices are built; falls back to `children()`
    /// (which allocates) if not.
    pub fn child_at(&self, parent_idx: usize, pos: usize) -> Option<usize> {
        if self.has_indices() {
            let slice = self.child_tag_slice(parent_idx);
            slice.get(pos).map(|&c| c as usize)
        } else {
            let children = self.children(parent_idx);
            children.get(pos).copied()
        }
    }

    /// Get the first direct text child of an element without allocating.
    /// Returns the text before the first child element (ElementTree `.text` semantics).
    pub fn direct_text_first(&self, tag_idx: usize) -> Option<&'a str> {
        if self.has_indices() {
            let text_children = self.child_text_slice(tag_idx);
            if let Some(&first_idx) = text_children.first() {
                let range = &self.text_ranges[first_idx as usize];
                let text = self.text_content(range);
                if !text.is_empty() {
                    return Some(text);
                }
            }
            None
        } else {
            // Fallback: linear scan
            for range in &self.text_ranges {
                if range.parent_tag == tag_idx as u32 {
                    let text = self.text_content(range);
                    if !text.is_empty() {
                        return Some(text);
                    }
                }
            }
            None
        }
    }

    /// Get the tail text of an element — text after its closing tag, before the
    /// next sibling opens. This is the ElementTree `.tail` concept.
    ///
    /// Returns `None` for root elements or elements with no tail text.
    pub fn tail_text(&self, tag_idx: usize) -> Option<&'a str> {
        let parent = self.parent(tag_idx)?;

        // Find the end of this element (close tag or self-close)
        let close_idx = self.matching_close(tag_idx).unwrap_or(tag_idx);
        let close_end = self.tag_ends[close_idx] + 1;

        // Binary search for text range starting at close_end.
        // text_ranges is sorted by start offset.
        let start_idx = self
            .text_ranges
            .partition_point(|r| r.start < close_end);

        for range in &self.text_ranges[start_idx..] {
            if range.start > close_end {
                break; // Past the target offset
            }
            if range.start == close_end && range.parent_tag == parent as u32 {
                let text = self.text_content(range);
                if !text.is_empty() {
                    return Some(text);
                }
            }
        }
        None
    }

    /// Collect all text within an element in stdlib itertext() order.
    ///
    /// Returns text segments in document order: .text, then for each child
    /// recursively their itertext, then the child's .tail. This is a single
    /// Rust call replacing per-element Python callbacks.
    pub fn itertext_collect(&self, tag_idx: usize) -> Vec<&'a str> {
        let start_offset = self.tag_starts[tag_idx];
        let close_idx = self.matching_close(tag_idx).unwrap_or(tag_idx);
        let end_offset = self.tag_ends[close_idx] + 1;

        let first = self.text_ranges.partition_point(|r| r.start < start_offset);

        let mut result = Vec::new();
        for range in &self.text_ranges[first..] {
            if range.start >= end_offset {
                break;
            }
            let text = self.text_content(range);
            if !text.is_empty() {
                result.push(text);
            }
        }
        result
    }

    /// Serialize an element to C14N canonical XML.
    ///
    /// Attributes sorted lexicographically, empty elements expanded to
    /// start/end pairs. Single Rust call, no per-element FFI.
    pub fn canonicalize(&self, tag_idx: usize) -> String {
        let mut out = String::new();
        self.c14n_element(tag_idx, &mut out);
        out
    }

    fn c14n_element(&self, tag_idx: usize, out: &mut String) {
        let tag = self.tag_name(tag_idx);
        out.push('<');
        out.push_str(tag);

        // Sorted attributes
        let mut attrs = self.get_all_attribute_names(tag_idx)
            .into_iter()
            .filter_map(|name| {
                self.get_attribute(tag_idx, name).map(|val| (name, val))
            })
            .collect::<Vec<_>>();
        attrs.sort_by_key(|&(name, _)| name);
        for (name, val) in &attrs {
            out.push(' ');
            out.push_str(name);
            out.push_str("=\"");
            c14n_escape_attr(val, out);
            out.push('"');
        }
        out.push('>');

        // Text content
        if let Some(text) = self.direct_text_first(tag_idx) {
            c14n_escape_text(text, out);
        }

        // Children
        for &child in self.child_slice(tag_idx) {
            let child = child as usize;
            self.c14n_element(child, out);
            if let Some(tail) = self.tail_text(child) {
                c14n_escape_text(tail, out);
            }
        }

        // Closing tag (C14N never uses self-closing)
        out.push_str("</");
        out.push_str(tag);
        out.push('>');
    }
}

fn c14n_escape_text(text: &str, out: &mut String) {
    for c in text.chars() {
        match c {
            '&' => out.push_str("&amp;"),
            '<' => out.push_str("&lt;"),
            '>' => out.push_str("&gt;"),
            '\r' => out.push_str("&#xD;"),
            _ => out.push(c),
        }
    }
}

fn c14n_escape_attr(text: &str, out: &mut String) {
    for c in text.chars() {
        match c {
            '&' => out.push_str("&amp;"),
            '<' => out.push_str("&lt;"),
            '"' => out.push_str("&quot;"),
            '\t' => out.push_str("&#x9;"),
            '\n' => out.push_str("&#xA;"),
            '\r' => out.push_str("&#xD;"),
            _ => out.push(c),
        }
    }
}

// === Free functions for parallel build_indices ===

/// Build CSR child index from tag_types and parents arrays.
pub(crate) fn build_csr_children(
    tag_types: &[TagType],
    parents: &[u32],
    n: usize,
) -> (Vec<u32>, Vec<u32>) {
    let mut child_counts = vec![0u32; n + 1];
    for i in 0..n {
        let tt = tag_types[i];
        if tt == TagType::Close || tt == TagType::CData {
            continue;
        }
        let parent = parents[i];
        if parent != u32::MAX && (parent as usize) < n {
            child_counts[parent as usize] += 1;
        }
    }

    let mut child_offsets = vec![0u32; n + 1];
    for i in 0..n {
        child_offsets[i + 1] = child_offsets[i] + child_counts[i];
    }
    let total_children = child_offsets[n] as usize;
    let mut child_data = vec![0u32; total_children];

    let mut write_pos = child_offsets.clone();
    for i in 0..n {
        let tt = tag_types[i];
        if tt == TagType::Close || tt == TagType::CData {
            continue;
        }
        let parent = parents[i];
        if parent != u32::MAX && (parent as usize) < n {
            let p = parent as usize;
            child_data[write_pos[p] as usize] = i as u32;
            write_pos[p] += 1;
        }
    }

    (child_offsets, child_data)
}

/// Build CSR text child index from text_ranges.
pub(crate) fn build_csr_text_children(
    text_ranges: &[TextRange],
    n: usize,
) -> (Vec<u32>, Vec<u32>) {
    let mut text_counts = vec![0u32; n + 1];
    for range in text_ranges {
        let parent = range.parent_tag;
        if parent != u32::MAX && (parent as usize) < n {
            text_counts[parent as usize] += 1;
        }
    }

    let mut text_child_offsets = vec![0u32; n + 1];
    for i in 0..n {
        text_child_offsets[i + 1] = text_child_offsets[i] + text_counts[i];
    }
    let total_text = text_child_offsets[n] as usize;
    let mut text_child_data = vec![0u32; total_text];

    let mut text_write_pos = text_child_offsets.clone();
    for (ti, range) in text_ranges.iter().enumerate() {
        let parent = range.parent_tag;
        if parent != u32::MAX && (parent as usize) < n {
            let p = parent as usize;
            text_child_data[text_write_pos[p] as usize] = ti as u32;
            text_write_pos[p] += 1;
        }
    }

    (text_child_offsets, text_child_data)
}

/// Fused close_map + post_order in a single pass over tag_types.
/// Both need a stack, so we fuse them to halve the iteration cost.
fn build_close_map_and_post_order(
    tag_types: &[TagType],
    n: usize,
) -> (Vec<u32>, Vec<u32>) {
    let mut close_map = vec![u32::MAX; n];
    let mut post_order = vec![0u32; n];
    let mut stack: Vec<usize> = Vec::new();
    let mut post_counter: u32 = 0;

    for i in 0..n {
        match tag_types[i] {
            TagType::Open => {
                stack.push(i);
            }
            TagType::Close => {
                if let Some(open_idx) = stack.pop() {
                    close_map[open_idx] = i as u32;
                    post_order[open_idx] = post_counter;
                }
                post_order[i] = post_counter;
                post_counter += 1;
            }
            TagType::SelfClose => {
                close_map[i] = i as u32;
                post_order[i] = post_counter;
                post_counter += 1;
            }
            TagType::Comment | TagType::PI | TagType::CData => {
                post_order[i] = post_counter;
                post_counter += 1;
            }
        }
    }

    (close_map, post_order)
}