Skip to main content

xmloxide/xinclude/
mod.rs

1//! `XInclude` 1.0 processing.
2//!
3//! This module implements the [XML Inclusions (XInclude) 1.0](https://www.w3.org/TR/xinclude/)
4//! specification. `XInclude` allows XML documents to reference and include content from
5//! other XML or text resources using `xi:include` elements.
6//!
7//! # Overview
8//!
9//! `XInclude` processing replaces `<xi:include>` elements (in the
10//! `http://www.w3.org/2001/XInclude` namespace) with the content they reference.
11//! The `href` attribute specifies the URI of the resource to include, and the
12//! `parse` attribute determines whether the content is included as parsed XML
13//! (`parse="xml"`, the default) or as a text node (`parse="text"`).
14//!
15//! If a resource cannot be resolved, the processor looks for an `<xi:fallback>`
16//! child element and uses its content instead. If no fallback is provided, the
17//! include is recorded as an error.
18//!
19//! # Design
20//!
21//! Since the core library does not perform I/O, the caller provides a resolver
22//! callback (`Fn(&str) -> Option<String>`) that maps URIs to content. This
23//! allows the library to be used in any environment (filesystem, network,
24//! in-memory test fixtures, etc.).
25
26use std::collections::HashSet;
27use std::fmt;
28
29use crate::tree::{Document, NodeId, NodeKind};
30
31/// The `XInclude` namespace URI.
32///
33/// All `xi:include` and `xi:fallback` elements must be in this namespace
34/// for `XInclude` processing to recognize them.
35pub const XINCLUDE_NS: &str = "http://www.w3.org/2001/XInclude";
36
37/// The local name of the include element.
38const INCLUDE_ELEMENT: &str = "include";
39
40/// The local name of the fallback element.
41const FALLBACK_ELEMENT: &str = "fallback";
42
43/// Options for `XInclude` processing.
44///
45/// Controls the behavior of [`process_xincludes`], such as the maximum
46/// nesting depth for recursive includes.
47///
48/// # Examples
49///
50/// ```
51/// use xmloxide::xinclude::XIncludeOptions;
52///
53/// let opts = XIncludeOptions::default();
54/// assert_eq!(opts.max_depth, 50);
55/// ```
56#[derive(Debug, Clone)]
57pub struct XIncludeOptions {
58    /// Maximum nesting depth for recursive includes.
59    ///
60    /// When an included document itself contains `xi:include` elements,
61    /// processing recurses. This limit prevents infinite recursion or
62    /// excessively deep include chains. The default is 50.
63    pub max_depth: usize,
64}
65
66impl Default for XIncludeOptions {
67    fn default() -> Self {
68        Self { max_depth: 50 }
69    }
70}
71
72/// An error encountered during `XInclude` processing.
73///
74/// Errors are collected rather than stopping processing, so that as many
75/// includes as possible are resolved even when some fail.
76#[derive(Debug, Clone)]
77pub struct XIncludeError {
78    /// Human-readable description of the error.
79    pub message: String,
80    /// The `href` that caused the error, if applicable.
81    pub href: Option<String>,
82}
83
84impl fmt::Display for XIncludeError {
85    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
86        match &self.href {
87            Some(href) => write!(f, "XInclude error for '{href}': {}", self.message),
88            None => write!(f, "XInclude error: {}", self.message),
89        }
90    }
91}
92
93/// Result of `XInclude` processing.
94///
95/// Contains the processed document with all resolvable includes expanded,
96/// along with statistics and any errors encountered.
97pub struct XIncludeResult {
98    /// Number of includes that were successfully processed.
99    pub inclusions: usize,
100    /// Errors encountered during processing.
101    ///
102    /// Each error corresponds to an `xi:include` element that could not
103    /// be resolved and had no usable `xi:fallback`.
104    pub errors: Vec<XIncludeError>,
105}
106
107/// Processes `XInclude` elements in a document.
108///
109/// Walks the document tree looking for elements in the `XInclude` namespace
110/// (`http://www.w3.org/2001/XInclude`) with local name `include`. For each
111/// such element:
112///
113/// 1. The `href` attribute is read to determine the resource URI.
114/// 2. The `parse` attribute is read to determine how to interpret the content
115///    (`"xml"` or `"text"`, defaulting to `"xml"`).
116/// 3. The `resolver` callback is called with the href (minus any fragment) to
117///    obtain the resource content.
118/// 4. On success, the `xi:include` element is replaced with the included content.
119/// 5. On failure, the `xi:fallback` child is used if present; otherwise an error
120///    is recorded.
121///
122/// The resolver callback receives the href string and returns `Some(content)` if
123/// the resource is available, or `None` if it cannot be resolved.
124///
125/// # Circular inclusion detection
126///
127/// The processor tracks which hrefs have been included in the current inclusion
128/// chain and rejects any attempt to include an already-active href, preventing
129/// infinite loops.
130///
131/// # Examples
132///
133/// ```
134/// use xmloxide::Document;
135/// use xmloxide::xinclude::{process_xincludes, XIncludeOptions};
136///
137/// let xml = r#"<doc xmlns:xi="http://www.w3.org/2001/XInclude">
138///   <xi:include href="greeting.xml"/>
139/// </doc>"#;
140///
141/// let mut doc = Document::parse_str(xml).unwrap();
142/// let result = process_xincludes(&mut doc, |href| {
143///     match href {
144///         "greeting.xml" => Some("<hello>world</hello>".to_string()),
145///         _ => None,
146///     }
147/// }, &XIncludeOptions::default());
148///
149/// assert_eq!(result.inclusions, 1);
150/// assert!(result.errors.is_empty());
151/// ```
152pub fn process_xincludes<F>(
153    doc: &mut Document,
154    resolver: F,
155    options: &XIncludeOptions,
156) -> XIncludeResult
157where
158    F: Fn(&str) -> Option<String>,
159{
160    let mut state = ProcessingState {
161        inclusions: 0,
162        errors: Vec::new(),
163        active_hrefs: HashSet::new(),
164        max_depth: options.max_depth,
165    };
166
167    process_node(doc, doc.root(), &resolver, &mut state, 0);
168
169    XIncludeResult {
170        inclusions: state.inclusions,
171        errors: state.errors,
172    }
173}
174
175/// Internal mutable state carried through the `XInclude` processing pass.
176struct ProcessingState {
177    /// Number of successfully processed includes.
178    inclusions: usize,
179    /// Accumulated errors.
180    errors: Vec<XIncludeError>,
181    /// Set of hrefs currently in the inclusion chain (for cycle detection).
182    active_hrefs: HashSet<String>,
183    /// Maximum allowed nesting depth.
184    max_depth: usize,
185}
186
187/// Recursively processes `XInclude` elements under the given node.
188///
189/// We collect the list of children first (as a `Vec<NodeId>`) to avoid
190/// borrowing issues while mutating the document.
191fn process_node<F>(
192    doc: &mut Document,
193    node: NodeId,
194    resolver: &F,
195    state: &mut ProcessingState,
196    depth: usize,
197) where
198    F: Fn(&str) -> Option<String>,
199{
200    // Collect children before iteration, since we may mutate the tree.
201    let children: Vec<NodeId> = doc.children(node).collect();
202
203    for child in children {
204        if is_xinclude_element(doc, child) {
205            process_include_element(doc, child, resolver, state, depth);
206        } else {
207            // Recurse into non-include elements to find nested xi:include.
208            process_node(doc, child, resolver, state, depth);
209        }
210    }
211}
212
213/// Checks whether a node is an `xi:include` element in the `XInclude` namespace.
214fn is_xinclude_element(doc: &Document, node: NodeId) -> bool {
215    if let NodeKind::Element {
216        name, namespace, ..
217    } = &doc.node(node).kind
218    {
219        name == INCLUDE_ELEMENT && namespace.as_deref() == Some(XINCLUDE_NS)
220    } else {
221        false
222    }
223}
224
225/// Checks whether a node is an `xi:fallback` element in the `XInclude` namespace.
226fn is_fallback_element(doc: &Document, node: NodeId) -> bool {
227    if let NodeKind::Element {
228        name, namespace, ..
229    } = &doc.node(node).kind
230    {
231        name == FALLBACK_ELEMENT && namespace.as_deref() == Some(XINCLUDE_NS)
232    } else {
233        false
234    }
235}
236
237/// Processes a single `xi:include` element.
238///
239/// Reads the `href` and `parse` attributes, resolves the content via the
240/// resolver, and replaces the `xi:include` element with the result.
241fn process_include_element<F>(
242    doc: &mut Document,
243    include_node: NodeId,
244    resolver: &F,
245    state: &mut ProcessingState,
246    depth: usize,
247) where
248    F: Fn(&str) -> Option<String>,
249{
250    // Read attributes from the xi:include element.
251    let href = doc.attribute(include_node, "href").map(str::to_owned);
252    let parse = doc
253        .attribute(include_node, "parse")
254        .unwrap_or("xml")
255        .to_owned();
256
257    // Validate: href is required.
258    let Some(href) = href else {
259        state.errors.push(XIncludeError {
260            message: "xi:include element is missing required 'href' attribute".to_string(),
261            href: None,
262        });
263        // Remove the xi:include element.
264        doc.detach(include_node);
265        return;
266    };
267
268    // Validate: parse must be "xml" or "text".
269    if parse != "xml" && parse != "text" {
270        state.errors.push(XIncludeError {
271            message: format!("invalid parse attribute value '{parse}'; expected 'xml' or 'text'"),
272            href: Some(href),
273        });
274        doc.detach(include_node);
275        return;
276    }
277
278    // Check depth limit.
279    if depth >= state.max_depth {
280        state.errors.push(XIncludeError {
281            message: format!(
282                "maximum XInclude nesting depth ({}) exceeded",
283                state.max_depth
284            ),
285            href: Some(href),
286        });
287        doc.detach(include_node);
288        return;
289    }
290
291    // Strip fragment identifier for resolution (but keep it for potential
292    // XPointer processing later).
293    let (base_href, _fragment) = split_fragment(&href);
294
295    // Check for circular inclusion.
296    if state.active_hrefs.contains(base_href) {
297        state.errors.push(XIncludeError {
298            message: "circular inclusion detected".to_string(),
299            href: Some(href),
300        });
301        doc.detach(include_node);
302        return;
303    }
304
305    // Resolve the resource.
306    let content = resolver(base_href);
307
308    match content {
309        Some(content) => {
310            // Mark this href as active in the inclusion chain.
311            state.active_hrefs.insert(base_href.to_owned());
312
313            let success = match parse.as_str() {
314                "xml" => process_xml_include(doc, include_node, &content, resolver, state, depth),
315                "text" => process_text_include(doc, include_node, &content),
316                _ => false, // Already validated above.
317            };
318
319            // Remove from active set after processing.
320            state.active_hrefs.remove(base_href);
321
322            if success {
323                state.inclusions += 1;
324            }
325        }
326        None => {
327            // Resource not found — try fallback.
328            if !try_fallback(doc, include_node, resolver, state, depth) {
329                state.errors.push(XIncludeError {
330                    message: "resource not found and no xi:fallback provided".to_string(),
331                    href: Some(href),
332                });
333                doc.detach(include_node);
334            }
335        }
336    }
337}
338
339/// Processes an XML include: parses the content as XML and replaces the
340/// `xi:include` element with the parsed children.
341///
342/// Returns `true` on success.
343fn process_xml_include<F>(
344    doc: &mut Document,
345    include_node: NodeId,
346    content: &str,
347    resolver: &F,
348    state: &mut ProcessingState,
349    depth: usize,
350) -> bool
351where
352    F: Fn(&str) -> Option<String>,
353{
354    // Parse the included content as an XML document.
355    let included_doc = match Document::parse_str(content) {
356        Ok(d) => d,
357        Err(e) => {
358            // Parse failure — try fallback, otherwise record error.
359            if try_fallback(doc, include_node, resolver, state, depth) {
360                return false;
361            }
362            state.errors.push(XIncludeError {
363                message: format!("failed to parse included XML: {e}"),
364                href: None,
365            });
366            doc.detach(include_node);
367            return false;
368        }
369    };
370
371    // Copy nodes from the included document into the main document.
372    // We need to deep-copy because the nodes live in a different arena.
373    let included_root = included_doc.root();
374    let included_children: Vec<NodeId> = included_doc.children(included_root).collect();
375
376    // Get the parent of the xi:include element so we can insert siblings.
377    let parent = doc.parent(include_node);
378
379    // Insert each child of the included document's root before the
380    // xi:include element, then remove the xi:include element.
381    let mut inserted_nodes = Vec::new();
382    for inc_child in &included_children {
383        let new_node = deep_copy_node(doc, &included_doc, *inc_child);
384        inserted_nodes.push(new_node);
385    }
386
387    // Insert all new nodes before the include element.
388    for new_node in &inserted_nodes {
389        doc.insert_before(include_node, *new_node);
390    }
391
392    // Detach and discard the xi:include element.
393    doc.detach(include_node);
394
395    // Recursively process XInclude elements in the newly inserted content.
396    if parent.is_some() {
397        // We only need to process the newly inserted nodes.
398        for new_node in inserted_nodes {
399            process_node(doc, new_node, resolver, state, depth + 1);
400        }
401    }
402
403    true
404}
405
406/// Processes a text include: creates a text node with the content and replaces
407/// the `xi:include` element.
408///
409/// Returns `true` on success.
410fn process_text_include(doc: &mut Document, include_node: NodeId, content: &str) -> bool {
411    let text_node = doc.create_node(NodeKind::Text {
412        content: content.to_string(),
413    });
414
415    doc.insert_before(include_node, text_node);
416    doc.detach(include_node);
417
418    true
419}
420
421/// Tries to use an `xi:fallback` child of the include element.
422///
423/// If a fallback is found, its children are moved to replace the `xi:include`
424/// element. Returns `true` if a fallback was found and applied.
425fn try_fallback<F>(
426    doc: &mut Document,
427    include_node: NodeId,
428    resolver: &F,
429    state: &mut ProcessingState,
430    depth: usize,
431) -> bool
432where
433    F: Fn(&str) -> Option<String>,
434{
435    // Find the first xi:fallback child.
436    let fallback_node = {
437        let children: Vec<NodeId> = doc.children(include_node).collect();
438        children
439            .into_iter()
440            .find(|&child| is_fallback_element(doc, child))
441    };
442
443    let Some(fallback) = fallback_node else {
444        return false;
445    };
446
447    // Collect the fallback's children.
448    let fallback_children: Vec<NodeId> = doc.children(fallback).collect();
449
450    // Detach each fallback child and insert before the xi:include element.
451    let mut inserted_nodes = Vec::new();
452    for child in fallback_children {
453        doc.detach(child);
454        doc.insert_before(include_node, child);
455        inserted_nodes.push(child);
456    }
457
458    // Remove the xi:include element (which still contains the now-empty fallback).
459    doc.detach(include_node);
460
461    // Recursively process the inserted fallback content.
462    for node in inserted_nodes {
463        process_node(doc, node, resolver, state, depth + 1);
464    }
465
466    true
467}
468
469/// Deep-copies a node (and all its descendants) from one document's arena
470/// into another.
471///
472/// This is necessary because nodes in different `Document`s live in separate
473/// arenas and cannot share `NodeId`s.
474fn deep_copy_node(target: &mut Document, source: &Document, source_id: NodeId) -> NodeId {
475    let source_node = source.node(source_id);
476    let new_id = target.create_node(source_node.kind.clone());
477
478    // Recursively copy children.
479    let children: Vec<NodeId> = source.children(source_id).collect();
480    for child_id in children {
481        let new_child = deep_copy_node(target, source, child_id);
482        target.append_child(new_id, new_child);
483    }
484
485    new_id
486}
487
488/// Splits a URI into the base part and optional fragment identifier.
489///
490/// For example, `"file.xml#section1"` returns `("file.xml", Some("section1"))`.
491/// If there is no fragment, returns `(href, None)`.
492fn split_fragment(href: &str) -> (&str, Option<&str>) {
493    if let Some(pos) = href.find('#') {
494        let (base, frag) = href.split_at(pos);
495        // frag starts with '#', skip it.
496        (base, Some(&frag[1..]))
497    } else {
498        (href, None)
499    }
500}
501
502#[cfg(test)]
503#[allow(clippy::unwrap_used)]
504mod tests {
505    use super::*;
506
507    // Helper: parse XML, process XIncludes with the given resolver, return the
508    // document and result.
509    fn process_with_resolver<F>(xml: &str, resolver: F) -> (Document, XIncludeResult)
510    where
511        F: Fn(&str) -> Option<String>,
512    {
513        let mut doc = Document::parse_str(xml).unwrap();
514        let result = process_xincludes(&mut doc, resolver, &XIncludeOptions::default());
515        (doc, result)
516    }
517
518    // Helper: serialize the document to a string for comparison.
519    fn doc_text_content(doc: &Document) -> String {
520        let root_elem = doc.root_element().unwrap();
521        doc.text_content(root_elem)
522    }
523
524    #[test]
525    fn test_basic_xml_include() {
526        let xml =
527            r#"<doc xmlns:xi="http://www.w3.org/2001/XInclude"><xi:include href="inc.xml"/></doc>"#;
528        let (doc, result) = process_with_resolver(xml, |href| match href {
529            "inc.xml" => Some("<greeting>hello</greeting>".to_string()),
530            _ => None,
531        });
532
533        assert_eq!(result.inclusions, 1);
534        assert!(result.errors.is_empty());
535
536        // The included <greeting> element should be a child of <doc>.
537        let root = doc.root_element().unwrap();
538        let children: Vec<NodeId> = doc.children(root).collect();
539        assert_eq!(children.len(), 1);
540        assert_eq!(doc.node_name(children[0]), Some("greeting"));
541        assert_eq!(doc.text_content(children[0]), "hello");
542    }
543
544    #[test]
545    fn test_basic_text_include() {
546        let xml = r#"<doc xmlns:xi="http://www.w3.org/2001/XInclude"><xi:include href="msg.txt" parse="text"/></doc>"#;
547        let (doc, result) = process_with_resolver(xml, |href| match href {
548            "msg.txt" => Some("Hello, World!".to_string()),
549            _ => None,
550        });
551
552        assert_eq!(result.inclusions, 1);
553        assert!(result.errors.is_empty());
554        assert_eq!(doc_text_content(&doc), "Hello, World!");
555    }
556
557    #[test]
558    fn test_fallback_when_resource_not_found() {
559        let xml = r#"<doc xmlns:xi="http://www.w3.org/2001/XInclude"><xi:include href="missing.xml"><xi:fallback><alt>fallback content</alt></xi:fallback></xi:include></doc>"#;
560        let (doc, result) = process_with_resolver(xml, |_| None);
561
562        assert_eq!(result.inclusions, 0);
563        assert!(result.errors.is_empty());
564
565        let root = doc.root_element().unwrap();
566        let children: Vec<NodeId> = doc.children(root).collect();
567        assert_eq!(children.len(), 1);
568        assert_eq!(doc.node_name(children[0]), Some("alt"));
569        assert_eq!(doc.text_content(children[0]), "fallback content");
570    }
571
572    #[test]
573    fn test_fallback_with_text_content() {
574        let xml = r#"<doc xmlns:xi="http://www.w3.org/2001/XInclude"><xi:include href="missing.xml"><xi:fallback>plain fallback</xi:fallback></xi:include></doc>"#;
575        let (doc, result) = process_with_resolver(xml, |_| None);
576
577        assert_eq!(result.inclusions, 0);
578        assert!(result.errors.is_empty());
579        assert_eq!(doc_text_content(&doc), "plain fallback");
580    }
581
582    #[test]
583    fn test_missing_href_attribute() {
584        let xml = r#"<doc xmlns:xi="http://www.w3.org/2001/XInclude"><xi:include/></doc>"#;
585        let (_doc, result) = process_with_resolver(xml, |_| None);
586
587        assert_eq!(result.inclusions, 0);
588        assert_eq!(result.errors.len(), 1);
589        assert!(result.errors[0].message.contains("missing required 'href'"));
590        assert!(result.errors[0].href.is_none());
591    }
592
593    #[test]
594    fn test_circular_inclusion_detection() {
595        // "a.xml" includes "b.xml" which includes "a.xml" again.
596        let xml =
597            r#"<doc xmlns:xi="http://www.w3.org/2001/XInclude"><xi:include href="a.xml"/></doc>"#;
598        let (_, result) = process_with_resolver(xml, |href| match href {
599            "a.xml" => Some(
600                r#"<a xmlns:xi="http://www.w3.org/2001/XInclude"><xi:include href="a.xml"/></a>"#
601                    .to_string(),
602            ),
603            _ => None,
604        });
605
606        // The first include succeeds, the second (circular) fails.
607        assert_eq!(result.inclusions, 1);
608        assert_eq!(result.errors.len(), 1);
609        assert!(result.errors[0].message.contains("circular inclusion"));
610    }
611
612    #[test]
613    fn test_max_depth_exceeded() {
614        let xml = r#"<doc xmlns:xi="http://www.w3.org/2001/XInclude"><xi:include href="deep.xml"/></doc>"#;
615        let mut doc = Document::parse_str(xml).unwrap();
616        let opts = XIncludeOptions { max_depth: 2 };
617
618        // Each level includes another level.
619        let result = process_xincludes(
620            &mut doc,
621            |href| {
622                match href {
623                "deep.xml" => Some(
624                    r#"<level xmlns:xi="http://www.w3.org/2001/XInclude"><xi:include href="deeper.xml"/></level>"#
625                        .to_string(),
626                ),
627                "deeper.xml" => Some(
628                    r#"<level xmlns:xi="http://www.w3.org/2001/XInclude"><xi:include href="deepest.xml"/></level>"#
629                        .to_string(),
630                ),
631                "deepest.xml" => Some("<leaf/>".to_string()),
632                _ => None,
633            }
634            },
635            &opts,
636        );
637
638        // depth 0 -> deep.xml succeeds, depth 1 -> deeper.xml succeeds,
639        // depth 2 -> deepest.xml exceeds max_depth=2.
640        assert!(result.errors.iter().any(|e| e.message.contains("depth")));
641    }
642
643    #[test]
644    fn test_multiple_includes_in_same_document() {
645        let xml = r#"<doc xmlns:xi="http://www.w3.org/2001/XInclude"><xi:include href="a.xml"/><xi:include href="b.xml"/></doc>"#;
646        let (doc, result) = process_with_resolver(xml, |href| match href {
647            "a.xml" => Some("<first/>".to_string()),
648            "b.xml" => Some("<second/>".to_string()),
649            _ => None,
650        });
651
652        assert_eq!(result.inclusions, 2);
653        assert!(result.errors.is_empty());
654
655        let root = doc.root_element().unwrap();
656        let children: Vec<NodeId> = doc.children(root).collect();
657        assert_eq!(children.len(), 2);
658        assert_eq!(doc.node_name(children[0]), Some("first"));
659        assert_eq!(doc.node_name(children[1]), Some("second"));
660    }
661
662    #[test]
663    fn test_nested_includes() {
664        let xml = r#"<doc xmlns:xi="http://www.w3.org/2001/XInclude"><xi:include href="outer.xml"/></doc>"#;
665        let (doc, result) = process_with_resolver(xml, |href| {
666            match href {
667            "outer.xml" => Some(
668                r#"<outer xmlns:xi="http://www.w3.org/2001/XInclude"><xi:include href="inner.xml"/></outer>"#
669                    .to_string(),
670            ),
671            "inner.xml" => Some("<inner>nested</inner>".to_string()),
672            _ => None,
673        }
674        });
675
676        assert_eq!(result.inclusions, 2);
677        assert!(result.errors.is_empty());
678
679        let root = doc.root_element().unwrap();
680        let outer: Vec<NodeId> = doc.children(root).collect();
681        assert_eq!(doc.node_name(outer[0]), Some("outer"));
682
683        let inner: Vec<NodeId> = doc.children(outer[0]).collect();
684        assert_eq!(doc.node_name(inner[0]), Some("inner"));
685        assert_eq!(doc.text_content(inner[0]), "nested");
686    }
687
688    #[test]
689    fn test_default_parse_attribute_is_xml() {
690        // When parse is not specified, it defaults to "xml".
691        let xml = r#"<doc xmlns:xi="http://www.w3.org/2001/XInclude"><xi:include href="data.xml"/></doc>"#;
692        let (doc, result) = process_with_resolver(xml, |href| match href {
693            "data.xml" => Some("<item>value</item>".to_string()),
694            _ => None,
695        });
696
697        assert_eq!(result.inclusions, 1);
698        assert!(result.errors.is_empty());
699
700        let root = doc.root_element().unwrap();
701        let children: Vec<NodeId> = doc.children(root).collect();
702        assert_eq!(doc.node_name(children[0]), Some("item"));
703    }
704
705    #[test]
706    fn test_include_replaces_entire_xi_include_element() {
707        // Verify that the xi:include element itself is completely removed.
708        let xml = r#"<doc xmlns:xi="http://www.w3.org/2001/XInclude"><before/><xi:include href="mid.xml"/><after/></doc>"#;
709        let (doc, result) = process_with_resolver(xml, |href| match href {
710            "mid.xml" => Some("<middle/>".to_string()),
711            _ => None,
712        });
713
714        assert_eq!(result.inclusions, 1);
715
716        let root = doc.root_element().unwrap();
717        let names: Vec<Option<&str>> = doc.children(root).map(|c| doc.node_name(c)).collect();
718        assert_eq!(names, vec![Some("before"), Some("middle"), Some("after")]);
719    }
720
721    #[test]
722    fn test_text_include_preserves_whitespace() {
723        let xml = r#"<doc xmlns:xi="http://www.w3.org/2001/XInclude"><xi:include href="ws.txt" parse="text"/></doc>"#;
724        let content = "  line1\n  line2\n";
725        let (doc, result) = process_with_resolver(xml, |href| match href {
726            "ws.txt" => Some(content.to_string()),
727            _ => None,
728        });
729
730        assert_eq!(result.inclusions, 1);
731        assert_eq!(doc_text_content(&doc), content);
732    }
733
734    #[test]
735    fn test_empty_include_content() {
736        // Including content that parses to an empty document root.
737        let xml = r#"<doc xmlns:xi="http://www.w3.org/2001/XInclude"><xi:include href="empty.txt" parse="text"/></doc>"#;
738        let (doc, result) = process_with_resolver(xml, |href| match href {
739            "empty.txt" => Some(String::new()),
740            _ => None,
741        });
742
743        assert_eq!(result.inclusions, 1);
744        assert!(result.errors.is_empty());
745        assert_eq!(doc_text_content(&doc), "");
746    }
747
748    #[test]
749    fn test_include_with_fragment_identifier() {
750        // Fragment identifiers are stripped for resolution; the base href
751        // is used to fetch the content.
752        let xml = r#"<doc xmlns:xi="http://www.w3.org/2001/XInclude"><xi:include href="data.xml#section1"/></doc>"#;
753        let (doc, result) = process_with_resolver(xml, |href| match href {
754            "data.xml" => Some("<section>content</section>".to_string()),
755            _ => None,
756        });
757
758        assert_eq!(result.inclusions, 1);
759        assert!(result.errors.is_empty());
760
761        let root = doc.root_element().unwrap();
762        let children: Vec<NodeId> = doc.children(root).collect();
763        assert_eq!(doc.node_name(children[0]), Some("section"));
764    }
765
766    #[test]
767    fn test_xinclude_namespace_detection() {
768        // An "include" element NOT in the XInclude namespace should be ignored.
769        let xml = r#"<doc><include href="should-ignore.xml"/></doc>"#;
770        let (_, result) = process_with_resolver(xml, |_| {
771            panic!("resolver should not be called for non-XInclude elements");
772        });
773
774        assert_eq!(result.inclusions, 0);
775        assert!(result.errors.is_empty());
776    }
777
778    #[test]
779    fn test_split_fragment() {
780        assert_eq!(split_fragment("file.xml#sec"), ("file.xml", Some("sec")));
781        assert_eq!(split_fragment("file.xml"), ("file.xml", None));
782        assert_eq!(split_fragment("file.xml#"), ("file.xml", Some("")));
783        assert_eq!(split_fragment("#frag"), ("", Some("frag")));
784    }
785
786    #[test]
787    fn test_no_fallback_records_error() {
788        let xml = r#"<doc xmlns:xi="http://www.w3.org/2001/XInclude"><xi:include href="nope.xml"/></doc>"#;
789        let (_, result) = process_with_resolver(xml, |_| None);
790
791        assert_eq!(result.inclusions, 0);
792        assert_eq!(result.errors.len(), 1);
793        assert!(result.errors[0].message.contains("resource not found"));
794        assert_eq!(result.errors[0].href.as_deref(), Some("nope.xml"));
795    }
796
797    #[test]
798    fn test_invalid_parse_attribute() {
799        let xml = r#"<doc xmlns:xi="http://www.w3.org/2001/XInclude"><xi:include href="x.xml" parse="json"/></doc>"#;
800        let (_, result) = process_with_resolver(xml, |_| None);
801
802        assert_eq!(result.errors.len(), 1);
803        assert!(result.errors[0].message.contains("invalid parse attribute"));
804    }
805
806    #[test]
807    fn test_xml_include_with_wrapper_element() {
808        // Included document has a root element with multiple children.
809        let xml = r#"<doc xmlns:xi="http://www.w3.org/2001/XInclude"><xi:include href="multi.xml"/></doc>"#;
810        let (doc, result) = process_with_resolver(xml, |href| match href {
811            "multi.xml" => Some("<wrapper><first/><second/></wrapper>".to_string()),
812            _ => None,
813        });
814
815        assert_eq!(result.inclusions, 1);
816        assert!(result.errors.is_empty());
817
818        let root = doc.root_element().unwrap();
819        let children: Vec<NodeId> = doc.children(root).collect();
820        // The <wrapper> element is inserted as a child of <doc>.
821        assert_eq!(children.len(), 1);
822        assert_eq!(doc.node_name(children[0]), Some("wrapper"));
823
824        let wrapper_children: Vec<NodeId> = doc.children(children[0]).collect();
825        assert_eq!(wrapper_children.len(), 2);
826        assert_eq!(doc.node_name(wrapper_children[0]), Some("first"));
827        assert_eq!(doc.node_name(wrapper_children[1]), Some("second"));
828    }
829
830    #[test]
831    fn test_options_default() {
832        let opts = XIncludeOptions::default();
833        assert_eq!(opts.max_depth, 50);
834    }
835
836    #[test]
837    fn test_error_display() {
838        let err = XIncludeError {
839            message: "resource not found".to_string(),
840            href: Some("file.xml".to_string()),
841        };
842        assert_eq!(
843            err.to_string(),
844            "XInclude error for 'file.xml': resource not found"
845        );
846
847        let err_no_href = XIncludeError {
848            message: "bad element".to_string(),
849            href: None,
850        };
851        assert_eq!(err_no_href.to_string(), "XInclude error: bad element");
852    }
853}