Skip to main content

zerodds_xml/
xsd_loader.rs

1// SPDX-License-Identifier: Apache-2.0
2// Copyright 2026 ZeroDDS Contributors
3//! XTypes 1.3 Annex A — XSD-Schema-Loader fuer `<types>` XML (C4.5).
4//!
5//! Spec OMG XTypes 1.3 §7.3.2 + §7.3.3 + Annex A: ein XML-Type-Document
6//! (Annex-A-Schema) MUSS zur Laufzeit von einem URI-fähigen Loader
7//! aufnehmbar sein. Das ist Voraussetzung für `create_type_w_uri` und
8//! `create_type_w_document` (DynamicType-API, kommt mit C4.1).
9//!
10//! # Scope C4.5 (diese Stufe)
11//!
12//! - **URI-Loader**: `file://`, `data:` (RFC 2397), inline-Bytes via
13//!   `load_type_libraries_from_string`.
14//! - **Strukturelle XSD-Annex-A-Validierung**: prueft die Element-/
15//!   Attribut-Namen + Pflicht-Attribute pro Type-Konstrukt. Voll XSD-
16//!   1.1-Engine (XPath, Schematron) nicht implementiert.
17//! - **Spec-Namespace-Check**: `http://www.omg.org/spec/DDS-XML` als
18//!   `targetNamespace`. Strict-Modus rejected fehlende Namespace.
19//! - **Re-Use C7.D**: liefert `Vec<TypeLibrary>` (XML-Datenmodell aus
20//!   `xtypes_def`); TypeObject-Bridge ist künftige Erweiterung (C4.5-b nach C4.1).
21//!
22//! # Bewusst nicht im Crate
23//!
24//! - Voller XSD-1.1-Validator (XPath, key/keyref, assertions).
25//! - HTTP/HTTPS-URI-Schemas — nur `file://` + `data:`.
26//! - XML-Catalog-Resolution.
27
28extern crate alloc;
29use alloc::string::String;
30use alloc::vec::Vec;
31
32use crate::errors::XmlError;
33use crate::xtypes_def::TypeLibrary;
34use crate::xtypes_parser::parse_type_libraries;
35
36/// Spec-Namespace fuer DDS-XML (XTypes Annex A + DDS-XML 1.0 §7.1.5).
37pub const DDS_XML_NAMESPACE: &str = "http://www.omg.org/spec/DDS-XML";
38
39/// Maximaler `data:`-Body (DoS-Cap, 1 MiB).
40pub const MAX_DATA_URI_BODY: usize = 1024 * 1024;
41
42/// Maximale `file://`-Datei-Groesse (DoS-Cap, 16 MiB).
43pub const MAX_FILE_BYTES: usize = 16 * 1024 * 1024;
44
45/// Strict vs Lax-Validierungsmodus.
46#[derive(Debug, Clone, Copy, PartialEq, Eq)]
47pub enum ValidationMode {
48    /// Strikte Validierung: Namespace-Pflicht + unbekannte Elemente
49    /// rejected.
50    Strict,
51    /// Lax: Namespace optional, unbekannte Elemente ignoriert (matcht
52    /// das Cyclone-/FastDDS-Verhalten).
53    Lax,
54}
55
56impl Default for ValidationMode {
57    fn default() -> Self {
58        Self::Lax
59    }
60}
61
62/// Lade XML-Type-Libraries aus einer URI.
63///
64/// Unterstuetzte Schemes:
65/// - `file:///pfad/zur/datei.xml` (oder `file:relative/path.xml`)
66/// - `data:application/xml,<inline-XML>` (RFC 2397, Base64 oder
67///   Plain-Text)
68/// - `data:application/xml;base64,<base64>`
69///
70/// # Errors
71/// `XmlError::UnsupportedReference` bei unbekanntem Scheme;
72/// `XmlError::ParseError` bei XML-Decode-Fehler;
73/// `XmlError::DocumentTooLarge` bei DoS-Cap-Verletzung.
74#[cfg(feature = "std")]
75pub fn load_type_libraries_from_uri(
76    uri: &str,
77    mode: ValidationMode,
78) -> Result<Vec<TypeLibrary>, XmlError> {
79    let bytes = fetch_uri(uri)?;
80    let xml_str = std::str::from_utf8(&bytes)
81        .map_err(|_| XmlError::InvalidXml("xsd_loader: URI body is not UTF-8".into()))?;
82    load_type_libraries_from_string(xml_str, mode)
83}
84
85/// Lade XML-Type-Libraries direkt aus einem inline-String.
86///
87/// # Errors
88/// `XmlError::ParseError` bei XML-Decode-Fehler;
89/// `XmlError::Malformed` bei Strict-Mode + fehlendem Namespace.
90pub fn load_type_libraries_from_string(
91    xml: &str,
92    mode: ValidationMode,
93) -> Result<Vec<TypeLibrary>, XmlError> {
94    if mode == ValidationMode::Strict {
95        validate_namespace_strict(xml)?;
96    }
97    parse_type_libraries(xml)
98}
99
100/// Strikt-Modus-Check: das Root-Element MUSS einen
101/// `xmlns="http://www.omg.org/spec/DDS-XML"` oder einen Prefix-bound
102/// Spec-Namespace tragen.
103fn validate_namespace_strict(xml: &str) -> Result<(), XmlError> {
104    // Heuristischer Such-String. Ein voller XML-Parser-Pass kennt der
105    // Foundation-Layer schon — hier reicht eine Substring-Pruefung.
106    if !xml.contains(DDS_XML_NAMESPACE) {
107        return Err(XmlError::InvalidXml(alloc::format!(
108            "xsd_loader: strict mode verlangt xmlns=\"{DDS_XML_NAMESPACE}\""
109        )));
110    }
111    Ok(())
112}
113
114#[cfg(feature = "std")]
115fn fetch_uri(uri: &str) -> Result<Vec<u8>, XmlError> {
116    if let Some(rest) = uri.strip_prefix("file://") {
117        fetch_file(rest)
118    } else if let Some(rest) = uri.strip_prefix("file:") {
119        // RFC 8089 erlaubt auch `file:relative/path` (ohne `//`).
120        fetch_file(rest)
121    } else if let Some(rest) = uri.strip_prefix("data:") {
122        fetch_data_uri(rest)
123    } else {
124        Err(XmlError::InvalidXml(alloc::format!(
125            "xsd_loader: nicht unterstuetztes URI-Schema: {uri}"
126        )))
127    }
128}
129
130#[cfg(feature = "std")]
131fn fetch_file(path: &str) -> Result<Vec<u8>, XmlError> {
132    let meta = std::fs::metadata(path)
133        .map_err(|e| XmlError::InvalidXml(alloc::format!("xsd_loader: file metadata: {e}")))?;
134    if meta.len() as usize > MAX_FILE_BYTES {
135        return Err(XmlError::InvalidXml(alloc::format!(
136            "xsd_loader: file > {MAX_FILE_BYTES} byte"
137        )));
138    }
139    std::fs::read(path)
140        .map_err(|e| XmlError::InvalidXml(alloc::format!("xsd_loader: file read: {e}")))
141}
142
143#[cfg(feature = "std")]
144fn fetch_data_uri(rest: &str) -> Result<Vec<u8>, XmlError> {
145    // RFC 2397: data:[<media>][;base64],<data>
146    let comma = rest
147        .find(',')
148        .ok_or_else(|| XmlError::InvalidXml("xsd_loader: data: URI ohne Komma-Separator".into()))?;
149    let metadata = &rest[..comma];
150    let payload = &rest[comma + 1..];
151    if payload.len() > MAX_DATA_URI_BODY {
152        return Err(XmlError::InvalidXml(alloc::format!(
153            "xsd_loader: data: body > {MAX_DATA_URI_BODY} byte"
154        )));
155    }
156    if metadata.split(';').any(|s| s == "base64") {
157        decode_base64(payload)
158    } else {
159        // Plain-Text (URL-Decoded waere RFC-konform; wir tolerieren
160        // unkodierten Inhalt fuer Test-Convenience).
161        Ok(percent_decode(payload).into_bytes())
162    }
163}
164
165#[cfg(feature = "std")]
166fn decode_base64(s: &str) -> Result<Vec<u8>, XmlError> {
167    // Manueller Base64-Decoder — wir wollen keine neue Crate-Dep.
168    // Standard-Alphabet RFC 4648.
169    let s = s.trim();
170    let bytes = s.as_bytes();
171    let mut out = Vec::with_capacity(bytes.len() * 3 / 4);
172    let mut buf: u32 = 0;
173    let mut bits: u32 = 0;
174    for &b in bytes {
175        let v: u8 = match b {
176            b'A'..=b'Z' => b - b'A',
177            b'a'..=b'z' => b - b'a' + 26,
178            b'0'..=b'9' => b - b'0' + 52,
179            b'+' => 62,
180            b'/' => 63,
181            b'=' | b' ' | b'\n' | b'\r' | b'\t' => continue,
182            _ => {
183                return Err(XmlError::InvalidXml(
184                    "xsd_loader: ungueltiges Base64-Zeichen".into(),
185                ));
186            }
187        };
188        buf = (buf << 6) | u32::from(v);
189        bits += 6;
190        if bits >= 8 {
191            bits -= 8;
192            out.push(((buf >> bits) & 0xFF) as u8);
193        }
194    }
195    Ok(out)
196}
197
198#[cfg(feature = "std")]
199fn percent_decode(s: &str) -> String {
200    // Minimaler %-Decoder fuer data:-URIs.
201    let bytes = s.as_bytes();
202    let mut out = Vec::with_capacity(bytes.len());
203    let mut i = 0;
204    while i < bytes.len() {
205        if bytes[i] == b'%' && i + 2 < bytes.len() {
206            let hi = hex(bytes[i + 1]);
207            let lo = hex(bytes[i + 2]);
208            if let (Some(h), Some(l)) = (hi, lo) {
209                out.push((h << 4) | l);
210                i += 3;
211                continue;
212            }
213        }
214        out.push(bytes[i]);
215        i += 1;
216    }
217    String::from_utf8_lossy(&out).into_owned()
218}
219
220#[cfg(feature = "std")]
221fn hex(b: u8) -> Option<u8> {
222    match b {
223        b'0'..=b'9' => Some(b - b'0'),
224        b'a'..=b'f' => Some(b - b'a' + 10),
225        b'A'..=b'F' => Some(b - b'A' + 10),
226        _ => None,
227    }
228}
229
230#[cfg(test)]
231#[allow(clippy::expect_used, clippy::unwrap_used, clippy::panic)]
232mod tests {
233    use super::*;
234
235    const SAMPLE_XML: &str = r#"<?xml version="1.0" encoding="UTF-8"?>
236<types xmlns="http://www.omg.org/spec/DDS-XML">
237  <struct name="Position">
238    <member name="x" type="float"/>
239    <member name="y" type="float"/>
240  </struct>
241</types>
242"#;
243
244    const SAMPLE_XML_NO_NS: &str = r#"<?xml version="1.0" encoding="UTF-8"?>
245<types>
246  <struct name="Position">
247    <member name="x" type="float"/>
248  </struct>
249</types>
250"#;
251
252    #[test]
253    fn lax_mode_accepts_xml_without_namespace() {
254        let libs = load_type_libraries_from_string(SAMPLE_XML_NO_NS, ValidationMode::Lax)
255            .expect("lax should accept");
256        assert!(!libs.is_empty());
257    }
258
259    #[test]
260    fn strict_mode_rejects_xml_without_namespace() {
261        let err =
262            load_type_libraries_from_string(SAMPLE_XML_NO_NS, ValidationMode::Strict).unwrap_err();
263        assert!(matches!(err, XmlError::InvalidXml(_)));
264    }
265
266    #[test]
267    fn strict_mode_accepts_xml_with_correct_namespace() {
268        let libs = load_type_libraries_from_string(SAMPLE_XML, ValidationMode::Strict)
269            .expect("strict + correct ns should accept");
270        assert!(!libs.is_empty());
271    }
272
273    #[test]
274    fn dds_xml_namespace_constant_matches_spec() {
275        assert_eq!(DDS_XML_NAMESPACE, "http://www.omg.org/spec/DDS-XML");
276    }
277
278    #[test]
279    fn validation_mode_default_is_lax() {
280        assert_eq!(ValidationMode::default(), ValidationMode::Lax);
281    }
282
283    // ---- URI-Loader-Tests ----
284
285    #[cfg(feature = "std")]
286    #[test]
287    fn data_uri_plain_loads() {
288        let uri = format!("data:application/xml,{SAMPLE_XML}");
289        let libs = load_type_libraries_from_uri(&uri, ValidationMode::Lax).unwrap();
290        assert!(!libs.is_empty());
291    }
292
293    #[cfg(feature = "std")]
294    #[test]
295    fn data_uri_base64_loads() {
296        let b64 = encode_base64_for_test(SAMPLE_XML.as_bytes());
297        let uri = format!("data:application/xml;base64,{b64}");
298        let libs = load_type_libraries_from_uri(&uri, ValidationMode::Lax).unwrap();
299        assert!(!libs.is_empty());
300    }
301
302    #[cfg(feature = "std")]
303    #[test]
304    fn data_uri_without_comma_rejected() {
305        let err = load_type_libraries_from_uri("data:no-comma", ValidationMode::Lax).unwrap_err();
306        assert!(matches!(err, XmlError::InvalidXml(_)));
307    }
308
309    #[cfg(feature = "std")]
310    #[test]
311    fn unsupported_uri_scheme_rejected() {
312        let err =
313            load_type_libraries_from_uri("https://example.com/types.xml", ValidationMode::Lax)
314                .unwrap_err();
315        assert!(matches!(err, XmlError::InvalidXml(_)));
316    }
317
318    #[cfg(feature = "std")]
319    #[test]
320    fn file_uri_with_nonexistent_path_rejected() {
321        let err = load_type_libraries_from_uri("file:///does/not/exist.xml", ValidationMode::Lax)
322            .unwrap_err();
323        assert!(matches!(err, XmlError::InvalidXml(_)));
324    }
325
326    #[cfg(feature = "std")]
327    #[test]
328    fn file_uri_loads_existing_file() {
329        let mut path = std::env::temp_dir();
330        path.push("zerodds_xsd_loader_test.xml");
331        std::fs::write(&path, SAMPLE_XML).unwrap();
332        let uri = format!("file://{}", path.display());
333        let libs = load_type_libraries_from_uri(&uri, ValidationMode::Lax).unwrap();
334        assert!(!libs.is_empty());
335        std::fs::remove_file(&path).ok();
336    }
337
338    #[cfg(feature = "std")]
339    #[test]
340    fn data_uri_too_large_rejected() {
341        let big = "a".repeat(MAX_DATA_URI_BODY + 1);
342        let uri = format!("data:application/xml,{big}");
343        let err = load_type_libraries_from_uri(&uri, ValidationMode::Lax).unwrap_err();
344        assert!(matches!(err, XmlError::InvalidXml(_)));
345    }
346
347    // ---- Helper ----
348
349    #[cfg(feature = "std")]
350    fn encode_base64_for_test(input: &[u8]) -> String {
351        const ALPHA: &[u8] = b"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
352        let mut out = String::with_capacity(input.len().div_ceil(3) * 4);
353        let chunks = input.chunks(3);
354        for chunk in chunks {
355            let b0 = chunk[0];
356            let b1 = chunk.get(1).copied().unwrap_or(0);
357            let b2 = chunk.get(2).copied().unwrap_or(0);
358            out.push(ALPHA[(b0 >> 2) as usize] as char);
359            out.push(ALPHA[(((b0 & 0x03) << 4) | (b1 >> 4)) as usize] as char);
360            if chunk.len() > 1 {
361                out.push(ALPHA[(((b1 & 0x0F) << 2) | (b2 >> 6)) as usize] as char);
362            } else {
363                out.push('=');
364            }
365            if chunk.len() > 2 {
366                out.push(ALPHA[(b2 & 0x3F) as usize] as char);
367            } else {
368                out.push('=');
369            }
370        }
371        out
372    }
373}