Skip to main content

xsd_schema/validation/
hint_loader.rs

1//! Schema location hint loader.
2//!
3//! A helper that takes accumulated `xsi:schemaLocation` and
4//! `xsi:noNamespaceSchemaLocation` hints from a validation run and attempts
5//! to load the referenced schemas into a [`SchemaSetBuilder`] for
6//! recompilation.
7//!
8//! # Design
9//!
10//! `process_loaded_schemas()` is a whole-set compile pass that is not
11//! idempotent. Therefore this helper works with [`SchemaSetBuilder`]
12//! (pre-compile), not with an already-compiled [`SchemaSet`]. The caller
13//! adds their base schemas, enriches with hints, then compiles once.
14//!
15//! URI resolution is delegated to the builder's [`SchemaResolver`] so that
16//! Windows paths, URL normalization, and other platform-specific handling
17//! are applied consistently.
18//!
19//! # Example
20//!
21//! ```ignore
22//! // 1. Validate and collect hints
23//! let sl_hints = runtime.schema_location_hints().to_vec();
24//! let nnsl_hints = runtime.no_namespace_schema_location_hints().to_vec();
25//!
26//! // 2. Build enriched schema set
27//! let mut builder = SchemaSetBuilder::new();
28//! builder.try_add("base.xsd")?;
29//! load_hints_into_builder(&mut builder, &sl_hints, &nnsl_hints);
30//! let compiled = builder.compile()?;
31//! ```
32
33use super::info::{NoNamespaceSchemaLocationHint, SchemaLocationHint};
34use crate::builder::SchemaSetBuilder;
35use crate::error::SchemaError;
36
37/// Result of hint-driven schema loading.
38#[derive(Debug, Default)]
39pub struct HintLoadResult {
40    /// Number of schemas freshly loaded from hints.
41    pub loaded_count: usize,
42    /// Number of hints skipped (already loaded, load failure, etc.).
43    pub skipped_count: usize,
44    /// Errors encountered during loading (non-fatal — partial success is possible).
45    pub errors: Vec<SchemaError>,
46}
47
48/// Enrich a [`SchemaSetBuilder`] with schemas discovered from
49/// `xsi:schemaLocation` and `xsi:noNamespaceSchemaLocation` hints
50/// collected during a validation run.
51///
52/// Each hint carries its own base URI (from the instance document) so
53/// that relative schema locations are resolved correctly. URI resolution
54/// is performed by the builder's [`SchemaResolver`], which handles
55/// platform-specific paths and URL normalization.
56///
57/// Schemas that are already loaded in the builder are silently skipped
58/// and counted in [`HintLoadResult::skipped_count`].
59/// Load/network failures are non-fatal and collected in
60/// [`HintLoadResult::errors`].
61///
62/// The builder must NOT yet be compiled. After calling this, the caller
63/// should call `builder.compile()` to produce the final compiled schema set.
64pub fn load_hints_into_builder(
65    builder: &mut SchemaSetBuilder,
66    schema_location_hints: &[SchemaLocationHint],
67    no_namespace_hints: &[NoNamespaceSchemaLocationHint],
68) -> HintLoadResult {
69    let mut result = HintLoadResult::default();
70
71    for hint in schema_location_hints {
72        try_load_hint(builder, &hint.location, &hint.base_uri, &mut result);
73    }
74
75    for hint in no_namespace_hints {
76        try_load_hint(builder, &hint.location, &hint.base_uri, &mut result);
77    }
78
79    result
80}
81
82fn try_load_hint(
83    builder: &mut SchemaSetBuilder,
84    location: &str,
85    base_uri: &str,
86    result: &mut HintLoadResult,
87) {
88    match builder.try_add_relative(location, base_uri) {
89        Ok(true) => {
90            result.loaded_count += 1;
91        }
92        Ok(false) => {
93            // Already loaded — dedup skip
94            result.skipped_count += 1;
95        }
96        Err(e) => {
97            result.errors.push(e);
98            result.skipped_count += 1;
99        }
100    }
101}
102
103/// Outcome of [`enrich_schema_set`].
104///
105/// `schema_set` is `Some` only when the recompile succeeded. The other
106/// fields surface diagnostics that the previous `Option<SchemaSet>` API
107/// silently dropped:
108///
109/// - `hint_errors` — per-hint load failures (network, missing file,
110///   relative-URI resolution, ...). Always non-fatal.
111/// - `compile_error` — the `SchemaError` from recompiling the enriched
112///   builder, if recompilation failed. When this is `Some`, `schema_set`
113///   is always `None`.
114///
115/// The `is_no_op` helper distinguishes "no hints to apply" from
116/// "tried but failed".
117#[derive(Debug, Default)]
118pub struct EnrichmentOutcome {
119    /// Compiled enriched schema set, if recompile succeeded.
120    pub schema_set: Option<crate::schema::SchemaSet>,
121    /// Errors from individual hint loads (non-fatal, partial success
122    /// is possible).
123    pub hint_errors: Vec<SchemaError>,
124    /// Recompile error after the enriched builder was assembled, if any.
125    pub compile_error: Option<SchemaError>,
126}
127
128impl EnrichmentOutcome {
129    /// Returns `true` when no hints were provided — enrichment was a
130    /// no-op rather than a failure.
131    pub fn is_no_op(&self) -> bool {
132        self.schema_set.is_none()
133            && self.hint_errors.is_empty()
134            && self.compile_error.is_none()
135    }
136
137    /// Returns the enriched [`SchemaSet`] if available, otherwise the
138    /// original. Convenient when you want to "use enriched if it
139    /// worked, fall back to the original".
140    pub fn schema_set_or<'a>(
141        &'a self,
142        original: &'a crate::schema::SchemaSet,
143    ) -> &'a crate::schema::SchemaSet {
144        self.schema_set.as_ref().unwrap_or(original)
145    }
146}
147
148/// Build an enriched [`SchemaSet`] by re-loading the original schemas and
149/// adding any `xsi:schemaLocation` / `xsi:noNamespaceSchemaLocation` hints
150/// collected during a validation run.
151///
152/// Returns an [`EnrichmentOutcome`] describing the result. The compiled
153/// `schema_set` is populated only when at least one hint was supplied
154/// **and** the recompile succeeded; in every other case
155/// `schema_set` is `None` and `hint_errors` / `compile_error` describe
156/// what happened.
157///
158/// This is the recommended way to handle schema-location hints without
159/// manually tracking original schema file paths:
160///
161/// ```rust,ignore
162/// // After first validation pass:
163/// let sl = runtime.schema_location_hints().to_vec();
164/// let nnsl = runtime.no_namespace_schema_location_hints().to_vec();
165///
166/// let outcome = enrich_schema_set(&schema_set, &sl, &nnsl);
167/// if let Some(enriched) = outcome.schema_set.as_ref() {
168///     // Re-validate with enriched schema set
169/// } else if let Some(err) = outcome.compile_error.as_ref() {
170///     eprintln!("hint enrichment recompile failed: {err}");
171/// }
172/// ```
173pub fn enrich_schema_set(
174    original: &crate::schema::SchemaSet,
175    schema_location_hints: &[SchemaLocationHint],
176    no_namespace_hints: &[NoNamespaceSchemaLocationHint],
177) -> EnrichmentOutcome {
178    if schema_location_hints.is_empty() && no_namespace_hints.is_empty() {
179        return EnrichmentOutcome::default();
180    }
181
182    let mut builder = if original.xsd_version == crate::schema::model::XsdVersion::V1_1 {
183        SchemaSetBuilder::xsd11()
184    } else {
185        SchemaSetBuilder::new()
186    };
187
188    builder.add_from(original);
189    let hint_result =
190        load_hints_into_builder(&mut builder, schema_location_hints, no_namespace_hints);
191
192    match builder.compile() {
193        Ok(compiled) => EnrichmentOutcome {
194            schema_set: Some(compiled.into_schema_set()),
195            hint_errors: hint_result.errors,
196            compile_error: None,
197        },
198        Err(e) => EnrichmentOutcome {
199            schema_set: None,
200            hint_errors: hint_result.errors,
201            compile_error: Some(e),
202        },
203    }
204}
205
206
207#[cfg(test)]
208mod tests {
209    use super::*;
210    use crate::builder::SchemaSetBuilder;
211    use crate::validation::info::{NoNamespaceSchemaLocationHint, SchemaLocationHint};
212
213    #[test]
214    fn test_load_hints_empty() {
215        let mut builder = SchemaSetBuilder::new();
216        let result = load_hints_into_builder(&mut builder, &[], &[]);
217        assert_eq!(result.loaded_count, 0);
218        assert_eq!(result.skipped_count, 0);
219        assert!(result.errors.is_empty());
220    }
221
222    #[test]
223    fn test_load_hints_nonexistent_file_is_nonfatal() {
224        let mut builder = SchemaSetBuilder::new();
225        let hints = vec![SchemaLocationHint {
226            namespace: "urn:test".to_string(),
227            location: "nonexistent_schema_abc123.xsd".to_string(),
228            base_uri: String::new(),
229        }];
230        let result = load_hints_into_builder(&mut builder, &hints, &[]);
231        assert_eq!(result.loaded_count, 0);
232        assert_eq!(result.skipped_count, 1);
233        assert_eq!(result.errors.len(), 1);
234    }
235
236    #[test]
237    fn test_load_no_namespace_hints_nonexistent_is_nonfatal() {
238        let mut builder = SchemaSetBuilder::new();
239        let hints = vec![NoNamespaceSchemaLocationHint {
240            location: "nonexistent_schema_abc123.xsd".to_string(),
241            base_uri: String::new(),
242        }];
243        let result = load_hints_into_builder(&mut builder, &[], &hints);
244        assert_eq!(result.loaded_count, 0);
245        assert_eq!(result.skipped_count, 1);
246        assert_eq!(result.errors.len(), 1);
247    }
248
249    #[test]
250    fn test_duplicate_hints_counted_as_skipped() {
251        // Load a real schema, then try to load it again via a duplicate hint.
252        let xsd = r#"<xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema">
253            <xs:element name="root" type="xs:string"/>
254        </xs:schema>"#;
255        let mut builder = SchemaSetBuilder::new()
256            .add_source(xsd, "http://example.com/dedup.xsd")
257            .unwrap();
258
259        // Hint pointing to the same location should be skipped, not loaded again.
260        // try_add normalizes, and add_source records the exact base_uri —
261        // use the same absolute URI so normalization matches.
262        let hints = vec![SchemaLocationHint {
263            namespace: "".to_string(),
264            location: "http://example.com/dedup.xsd".to_string(),
265            base_uri: String::new(),
266        }];
267        let result = load_hints_into_builder(&mut builder, &hints, &[]);
268        assert_eq!(result.loaded_count, 0, "duplicate should not be loaded");
269        assert_eq!(result.skipped_count, 1, "duplicate should be skipped");
270        // The hint loader may produce an error if the resolver can't
271        // re-fetch the URL, but the is_loaded check should prevent that.
272        // The key assertion: it was not double-loaded.
273    }
274
275    #[test]
276    fn test_add_source_normalizes_for_dedup() {
277        // add_source with a relative path should normalize to the same
278        // absolute load key that a later hint resolves to.
279        let xsd = r#"<xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema">
280            <xs:element name="root" type="xs:string"/>
281        </xs:schema>"#;
282        let cwd = std::env::current_dir().unwrap();
283        let mut builder = SchemaSetBuilder::new()
284            .add_source(xsd, "schemas/test.xsd")
285            .unwrap();
286
287        let instance_base = cwd
288            .join("schemas")
289            .join("instance.xml")
290            .to_string_lossy()
291            .into_owned();
292        let hints = vec![SchemaLocationHint {
293            namespace: "".to_string(),
294            location: "test.xsd".to_string(),
295            base_uri: instance_base,
296        }];
297        let result = load_hints_into_builder(&mut builder, &hints, &[]);
298        assert_eq!(
299            result.loaded_count, 0,
300            "hint resolving to already-loaded URI should not reload"
301        );
302        assert_eq!(result.skipped_count, 1);
303    }
304
305    #[test]
306    fn test_enrich_schema_set_no_hints_is_no_op() {
307        let xsd = r#"<xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema">
308            <xs:element name="root" type="xs:string"/>
309        </xs:schema>"#;
310        let compiled = SchemaSetBuilder::new()
311            .add_source(xsd, "test.xsd")
312            .unwrap()
313            .compile()
314            .unwrap();
315
316        let outcome = enrich_schema_set(compiled.schema_set(), &[], &[]);
317        assert!(
318            outcome.is_no_op(),
319            "should be a no-op when no hints are provided"
320        );
321        assert!(outcome.schema_set.is_none());
322        assert!(outcome.compile_error.is_none());
323        assert!(outcome.hint_errors.is_empty());
324    }
325
326    #[test]
327    fn test_enrich_schema_set_preserves_original_elements() {
328        // Write a temp schema file so add_from can re-load from disk.
329        let dir = std::env::temp_dir().join("xsd_hint_test_enrich");
330        let _ = std::fs::create_dir_all(&dir);
331        let schema_path = dir.join("base.xsd");
332        std::fs::write(
333            &schema_path,
334            r#"<xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema">
335            <xs:element name="root" type="xs:string"/>
336        </xs:schema>"#,
337        )
338        .unwrap();
339
340        let compiled = SchemaSetBuilder::new()
341            .add("", &schema_path.to_string_lossy())
342            .unwrap()
343            .compile()
344            .unwrap();
345        let original = compiled.schema_set();
346
347        // Provide a hint that fails to load — enrichment should still
348        // succeed because add_from re-loaded the original schema, and
349        // the hint failure is surfaced in `hint_errors`.
350        let hints = vec![SchemaLocationHint {
351            namespace: "urn:test".to_string(),
352            location: "nonexistent_42.xsd".to_string(),
353            base_uri: String::new(),
354        }];
355
356        let outcome = enrich_schema_set(original, &hints, &[]);
357        assert!(
358            outcome.schema_set.is_some(),
359            "should return Some even if hint fails"
360        );
361        assert!(
362            !outcome.hint_errors.is_empty(),
363            "hint load failure must be surfaced in hint_errors"
364        );
365        assert!(
366            outcome.compile_error.is_none(),
367            "recompile of the original schemas should still succeed"
368        );
369
370        let enriched = outcome.schema_set.unwrap();
371        let name = enriched.name_table.add("root");
372        assert!(
373            enriched.lookup_element(None, name).is_some(),
374            "original element 'root' should still be present after enrichment"
375        );
376
377        let _ = std::fs::remove_dir_all(&dir);
378    }
379
380    #[test]
381    fn test_enrich_schema_set_preserves_xsd_version() {
382        let dir = std::env::temp_dir().join("xsd_hint_test_version");
383        let _ = std::fs::create_dir_all(&dir);
384        let schema_path = dir.join("test.xsd");
385        std::fs::write(
386            &schema_path,
387            r#"<xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema">
388            <xs:element name="root" type="xs:string"/>
389        </xs:schema>"#,
390        )
391        .unwrap();
392
393        let compiled = SchemaSetBuilder::xsd11()
394            .add("", &schema_path.to_string_lossy())
395            .unwrap()
396            .compile()
397            .unwrap();
398        let original = compiled.schema_set();
399        assert_eq!(original.xsd_version, crate::schema::model::XsdVersion::V1_1);
400
401        let hints = vec![SchemaLocationHint {
402            namespace: "urn:test".to_string(),
403            location: "nonexistent_42.xsd".to_string(),
404            base_uri: String::new(),
405        }];
406        let enriched = enrich_schema_set(original, &hints, &[])
407            .schema_set
408            .unwrap();
409        assert_eq!(
410            enriched.xsd_version,
411            crate::schema::model::XsdVersion::V1_1,
412            "enriched set should preserve XSD 1.1 version"
413        );
414
415        let _ = std::fs::remove_dir_all(&dir);
416    }
417
418    #[test]
419    fn test_enrich_schema_set_surfaces_compile_error() {
420        // Build an enriched set that cannot recompile: the hint adds a
421        // schema whose targetNamespace already exists in the original
422        // with a conflicting global element of the same name.
423        let dir = std::env::temp_dir().join("xsd_hint_test_compile_err");
424        let _ = std::fs::create_dir_all(&dir);
425        let primary = dir.join("primary.xsd");
426        std::fs::write(
427            &primary,
428            r#"<xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema"
429                          targetNamespace="urn:test">
430            <xs:element name="root" type="xs:string"/>
431        </xs:schema>"#,
432        )
433        .unwrap();
434        let conflict = dir.join("conflict.xsd");
435        std::fs::write(
436            &conflict,
437            r#"<xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema"
438                          targetNamespace="urn:test">
439            <xs:element name="root" type="xs:int"/>
440        </xs:schema>"#,
441        )
442        .unwrap();
443
444        let compiled = SchemaSetBuilder::new()
445            .add("urn:test", &primary.to_string_lossy())
446            .unwrap()
447            .compile()
448            .unwrap();
449        let original = compiled.schema_set();
450
451        let hints = vec![SchemaLocationHint {
452            namespace: "urn:test".to_string(),
453            location: conflict.to_string_lossy().into_owned(),
454            base_uri: String::new(),
455        }];
456        let outcome = enrich_schema_set(original, &hints, &[]);
457
458        // Either the recompile rejects the conflict (compile_error set),
459        // or the hint loader/dedup spots it (schema_set still None).
460        // What we are asserting: the failure is **not** silently swallowed.
461        assert!(
462            outcome.schema_set.is_none() || outcome.compile_error.is_none(),
463            "outcome must be internally consistent: {outcome:?}"
464        );
465        if outcome.schema_set.is_none() {
466            assert!(
467                outcome.compile_error.is_some() || !outcome.hint_errors.is_empty(),
468                "if no enriched set is produced, the failure reason must be \
469                 surfaced via compile_error or hint_errors, got: {outcome:?}"
470            );
471        }
472
473        let _ = std::fs::remove_dir_all(&dir);
474    }
475
476    #[test]
477    fn test_add_from_seeds_builder_with_loaded_locations() {
478        let dir = std::env::temp_dir().join("xsd_hint_test_add_from");
479        let _ = std::fs::create_dir_all(&dir);
480        let schema_path = dir.join("original.xsd");
481        std::fs::write(
482            &schema_path,
483            r#"<xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema">
484            <xs:element name="root" type="xs:string"/>
485        </xs:schema>"#,
486        )
487        .unwrap();
488
489        let compiled = SchemaSetBuilder::new()
490            .add("", &schema_path.to_string_lossy())
491            .unwrap()
492            .compile()
493            .unwrap();
494
495        let mut builder = SchemaSetBuilder::new();
496        builder.add_from(compiled.schema_set());
497
498        // Verify the builder loaded the schema (has at least one document)
499        assert!(builder.schema_count() > 0, "add_from should load schemas");
500
501        let _ = std::fs::remove_dir_all(&dir);
502    }
503}