hedl_xml/
lib.rs

1// Dweve HEDL - Hierarchical Entity Data Language
2//
3// Copyright (c) 2025 Dweve IP B.V. and individual contributors.
4//
5// SPDX-License-Identifier: Apache-2.0
6//
7// Licensed under the Apache License, Version 2.0 (the "License");
8// you may not use this file except in compliance with the License.
9// You may obtain a copy of the License in the LICENSE file at the
10// root of this repository or at: http://www.apache.org/licenses/LICENSE-2.0
11//
12// Unless required by applicable law or agreed to in writing, software
13// distributed under the License is distributed on an "AS IS" BASIS,
14// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15// See the License for the specific language governing permissions and
16// limitations under the License.
17
18//! HEDL XML Conversion
19//!
20//! Provides bidirectional conversion between HEDL documents and XML format.
21//!
22//! # Features
23//!
24//! - Convert HEDL documents to well-formed XML
25//! - Parse XML into HEDL documents with type inference
26//! - **Streaming support** for large multi-gigabyte XML files
27//! - **Async I/O** with Tokio (via `async` feature flag)
28//! - **XSD schema validation** with comprehensive error messages
29//! - **Schema caching** for high-performance validation
30//! - Configurable output formatting (pretty print, attributes)
31//! - Support for nested structures and matrix lists
32//! - Reference and expression preservation
33//!
34//! # Examples
35//!
36//! ## Converting HEDL to XML
37//!
38//! ```rust
39//! use hedl_core::{Document, Item, Value};
40//! use hedl_xml::{to_xml, ToXmlConfig};
41//! use std::collections::BTreeMap;
42//!
43//! let mut doc = Document::new((1, 0));
44//! doc.root.insert("name".to_string(), Item::Scalar(Value::String("example".to_string())));
45//!
46//! let config = ToXmlConfig::default();
47//! let xml = to_xml(&doc, &config).unwrap();
48//! ```
49//!
50//! ## Converting XML to HEDL
51//!
52//! ```rust
53//! use hedl_xml::{from_xml, FromXmlConfig};
54//!
55//! let xml = r#"<?xml version="1.0"?><hedl><name>example</name></hedl>"#;
56//! let config = FromXmlConfig::default();
57//! let doc = from_xml(xml, &config).unwrap();
58//! ```
59//!
60//! ## Streaming large XML files
61//!
62//! For multi-gigabyte XML files, use the streaming API to process items incrementally
63//! without loading the entire document into memory:
64//!
65//! ```rust,no_run
66//! use hedl_xml::streaming::{from_xml_stream, StreamConfig};
67//! use std::fs::File;
68//!
69//! let file = File::open("large.xml")?;
70//! let config = StreamConfig::default();
71//!
72//! for result in from_xml_stream(file, &config)? {
73//!     match result {
74//!         Ok(item) => println!("Processing: {}", item.key),
75//!         Err(e) => eprintln!("Error: {}", e),
76//!     }
77//! }
78//! # Ok::<(), Box<dyn std::error::Error>>(())
79//! ```
80//!
81//! ## XSD Schema Validation
82//!
83//! Validate XML documents against XSD schemas:
84//!
85//! ```rust
86//! use hedl_xml::schema::SchemaValidator;
87//!
88//! let schema = r#"<?xml version="1.0"?>
89//! <xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema">
90//!   <xs:element name="person">
91//!     <xs:complexType>
92//!       <xs:sequence>
93//!         <xs:element name="name" type="xs:string"/>
94//!         <xs:element name="age" type="xs:integer"/>
95//!       </xs:sequence>
96//!     </xs:complexType>
97//!   </xs:element>
98//! </xs:schema>"#;
99//!
100//! let validator = SchemaValidator::from_xsd(schema)?;
101//!
102//! let xml = r#"<?xml version="1.0"?>
103//! <person>
104//!   <name>Alice</name>
105//!   <age>30</age>
106//! </person>"#;
107//!
108//! validator.validate(xml)?;
109//! # Ok::<(), Box<dyn std::error::Error>>(())
110//! ```
111//!
112//! ## Async I/O (with `async` feature)
113//!
114//! Enable async support in `Cargo.toml`:
115//!
116//! ```toml
117//! [dependencies]
118//! hedl-xml = { version = "*", features = ["async"] }
119//! tokio = { version = "1", features = ["full"] }
120//! ```
121//!
122//! Then use async functions:
123//!
124//! ```rust,no_run
125//! # #[cfg(feature = "async")]
126//! # {
127//! use hedl_xml::async_api::{from_xml_file_async, to_xml_file_async};
128//! use hedl_xml::{FromXmlConfig, ToXmlConfig};
129//!
130//! # #[tokio::main]
131//! # async fn main() -> Result<(), Box<dyn std::error::Error>> {
132//! // Read XML asynchronously
133//! let doc = from_xml_file_async("input.xml", &FromXmlConfig::default()).await?;
134//!
135//! // Process document...
136//!
137//! // Write XML asynchronously
138//! to_xml_file_async(&doc, "output.xml", &ToXmlConfig::default()).await?;
139//! # Ok(())
140//! # }
141//! # }
142//! ```
143
144mod from_xml;
145mod to_xml;
146pub mod streaming;
147pub mod schema;
148
149#[cfg(feature = "async")]
150pub mod async_api;
151
152pub use from_xml::{from_xml, FromXmlConfig};
153pub use to_xml::{to_xml, ToXmlConfig};
154pub use streaming::{from_xml_stream, StreamConfig, StreamItem, XmlStreamingParser};
155pub use schema::{SchemaValidator, SchemaCache, ValidationError};
156
157use hedl_core::Document;
158
159/// Convert HEDL document to XML string with default configuration
160pub fn hedl_to_xml(doc: &Document) -> Result<String, String> {
161    to_xml(doc, &ToXmlConfig::default())
162}
163
164/// Convert XML string to HEDL document with default configuration
165pub fn xml_to_hedl(xml: &str) -> Result<Document, String> {
166    from_xml(xml, &FromXmlConfig::default())
167}
168
169#[cfg(test)]
170mod tests {
171    use super::*;
172    use hedl_core::{Document, Item, MatrixList, Node, Reference, Value};
173    use std::collections::BTreeMap;
174
175    #[test]
176    fn test_round_trip_scalars() {
177        let mut doc = Document::new((1, 0));
178        doc.root
179            .insert("null_val".to_string(), Item::Scalar(Value::Null));
180        doc.root
181            .insert("bool_val".to_string(), Item::Scalar(Value::Bool(true)));
182        doc.root
183            .insert("int_val".to_string(), Item::Scalar(Value::Int(42)));
184        doc.root
185            .insert("float_val".to_string(), Item::Scalar(Value::Float(3.25)));
186        doc.root.insert(
187            "string_val".to_string(),
188            Item::Scalar(Value::String("hello".to_string())),
189        );
190
191        let xml = hedl_to_xml(&doc).unwrap();
192        let doc2 = xml_to_hedl(&xml).unwrap();
193
194        assert_eq!(
195            doc2.root.get("bool_val").and_then(|i| i.as_scalar()),
196            Some(&Value::Bool(true))
197        );
198        assert_eq!(
199            doc2.root.get("int_val").and_then(|i| i.as_scalar()),
200            Some(&Value::Int(42))
201        );
202        assert_eq!(
203            doc2.root.get("string_val").and_then(|i| i.as_scalar()),
204            Some(&Value::String("hello".to_string()))
205        );
206    }
207
208    #[test]
209    fn test_round_trip_object() {
210        let mut doc = Document::new((1, 0));
211        let mut inner = BTreeMap::new();
212        inner.insert(
213            "name".to_string(),
214            Item::Scalar(Value::String("test".to_string())),
215        );
216        inner.insert("value".to_string(), Item::Scalar(Value::Int(100)));
217        doc.root.insert("config".to_string(), Item::Object(inner));
218
219        let xml = hedl_to_xml(&doc).unwrap();
220        let doc2 = xml_to_hedl(&xml).unwrap();
221
222        let config_obj = doc2.root.get("config").and_then(|i| i.as_object()).unwrap();
223        assert_eq!(
224            config_obj.get("name").and_then(|i| i.as_scalar()),
225            Some(&Value::String("test".to_string()))
226        );
227        assert_eq!(
228            config_obj.get("value").and_then(|i| i.as_scalar()),
229            Some(&Value::Int(100))
230        );
231    }
232
233    #[test]
234    fn test_round_trip_reference() {
235        let mut doc = Document::new((1, 0));
236        doc.root.insert(
237            "ref1".to_string(),
238            Item::Scalar(Value::Reference(Reference::local("user123"))),
239        );
240        doc.root.insert(
241            "ref2".to_string(),
242            Item::Scalar(Value::Reference(Reference::qualified("User", "456"))),
243        );
244
245        let xml = hedl_to_xml(&doc).unwrap();
246        let doc2 = xml_to_hedl(&xml).unwrap();
247
248        assert_eq!(
249            doc2.root.get("ref1").and_then(|i| i.as_scalar()),
250            Some(&Value::Reference(Reference::local("user123")))
251        );
252        assert_eq!(
253            doc2.root.get("ref2").and_then(|i| i.as_scalar()),
254            Some(&Value::Reference(Reference::qualified("User", "456")))
255        );
256    }
257
258    #[test]
259    fn test_round_trip_expression() {
260        use hedl_core::lex::{ExprLiteral, Expression, Span};
261
262        let mut doc = Document::new((1, 0));
263        let expr = Expression::Call {
264            name: "add".to_string(),
265            args: vec![
266                Expression::Identifier {
267                    name: "x".to_string(),
268                    span: Span::default(),
269                },
270                Expression::Literal {
271                    value: ExprLiteral::Int(1),
272                    span: Span::default(),
273                },
274            ],
275            span: Span::default(),
276        };
277        doc.root.insert(
278            "expr".to_string(),
279            Item::Scalar(Value::Expression(expr.clone())),
280        );
281
282        let xml = hedl_to_xml(&doc).unwrap();
283        let doc2 = xml_to_hedl(&xml).unwrap();
284
285        assert_eq!(
286            doc2.root.get("expr").and_then(|i| i.as_scalar()),
287            Some(&Value::Expression(expr))
288        );
289    }
290
291    #[test]
292    fn test_matrix_list() {
293        let mut doc = Document::new((1, 0));
294        let mut list = MatrixList::new("User", vec!["id".to_string(), "name".to_string()]);
295
296        let node1 = Node::new(
297            "User",
298            "user1",
299            vec![
300                Value::String("user1".to_string()),
301                Value::String("Alice".to_string()),
302            ],
303        );
304        let node2 = Node::new(
305            "User",
306            "user2",
307            vec![
308                Value::String("user2".to_string()),
309                Value::String("Bob".to_string()),
310            ],
311        );
312
313        list.add_row(node1);
314        list.add_row(node2);
315
316        doc.root.insert("users".to_string(), Item::List(list));
317
318        let xml = hedl_to_xml(&doc).unwrap();
319        assert!(xml.contains("<users"));
320        assert!(xml.contains("user1"));
321        assert!(xml.contains("user2"));
322    }
323
324    #[test]
325    fn test_special_characters_escaping() {
326        let mut doc = Document::new((1, 0));
327        doc.root.insert(
328            "text".to_string(),
329            Item::Scalar(Value::String(
330                "hello & goodbye <tag> \"quoted\"".to_string(),
331            )),
332        );
333
334        let xml = hedl_to_xml(&doc).unwrap();
335        let doc2 = xml_to_hedl(&xml).unwrap();
336
337        // XML escaping should be handled transparently
338        let original = doc.root.get("text").and_then(|i| i.as_scalar());
339        let parsed = doc2.root.get("text").and_then(|i| i.as_scalar());
340
341        assert_eq!(original, parsed);
342    }
343
344    #[test]
345    fn test_nested_objects() {
346        let mut doc = Document::new((1, 0));
347
348        let mut level2 = BTreeMap::new();
349        level2.insert(
350            "deep".to_string(),
351            Item::Scalar(Value::String("value".to_string())),
352        );
353
354        let mut level1 = BTreeMap::new();
355        level1.insert("nested".to_string(), Item::Object(level2));
356
357        doc.root.insert("outer".to_string(), Item::Object(level1));
358
359        let xml = hedl_to_xml(&doc).unwrap();
360        let doc2 = xml_to_hedl(&xml).unwrap();
361
362        assert!(doc2.root.contains_key("outer"));
363    }
364
365    #[test]
366    fn test_config_pretty_print() {
367        let mut doc = Document::new((1, 0));
368        doc.root.insert(
369            "test".to_string(),
370            Item::Scalar(Value::String("value".to_string())),
371        );
372
373        let config_pretty = ToXmlConfig {
374            pretty: true,
375            indent: "  ".to_string(),
376            ..Default::default()
377        };
378
379        let config_compact = ToXmlConfig {
380            pretty: false,
381            ..Default::default()
382        };
383
384        let xml_pretty = to_xml(&doc, &config_pretty).unwrap();
385        let xml_compact = to_xml(&doc, &config_compact).unwrap();
386
387        // Pretty printed should have newlines and indentation
388        assert!(xml_pretty.len() > xml_compact.len());
389    }
390
391    #[test]
392    fn test_config_custom_root() {
393        let doc = Document::new((1, 0));
394
395        let config = ToXmlConfig {
396            root_element: "custom_root".to_string(),
397            ..Default::default()
398        };
399
400        let xml = to_xml(&doc, &config).unwrap();
401        assert!(xml.contains("<custom_root"));
402        assert!(xml.contains("</custom_root>"));
403    }
404
405    #[test]
406    fn test_config_metadata() {
407        let doc = Document::new((2, 1));
408
409        let config = ToXmlConfig {
410            include_metadata: true,
411            ..Default::default()
412        };
413
414        let xml = to_xml(&doc, &config).unwrap();
415        assert!(xml.contains("version=\"2.1\""));
416    }
417
418    #[test]
419    fn test_empty_values() {
420        let mut doc = Document::new((1, 0));
421        doc.root
422            .insert("empty".to_string(), Item::Scalar(Value::Null));
423
424        let xml = hedl_to_xml(&doc).unwrap();
425        let doc2 = xml_to_hedl(&xml).unwrap();
426
427        assert!(doc2.root.contains_key("empty"));
428    }
429
430    #[test]
431    fn test_tensor_values() {
432        use hedl_core::lex::Tensor;
433
434        let mut doc = Document::new((1, 0));
435        let tensor = Tensor::Array(vec![
436            Tensor::Scalar(1.0),
437            Tensor::Scalar(2.0),
438            Tensor::Scalar(3.0),
439        ]);
440        doc.root
441            .insert("tensor".to_string(), Item::Scalar(Value::Tensor(tensor)));
442
443        let xml = hedl_to_xml(&doc).unwrap();
444        assert!(xml.contains("<tensor>"));
445        assert!(xml.contains("<item>"));
446    }
447
448    #[test]
449    fn test_infer_lists_config() {
450        let xml = r#"<?xml version="1.0" encoding="UTF-8"?>
451        <hedl>
452            <user id="1"><name>Alice</name></user>
453            <user id="2"><name>Bob</name></user>
454        </hedl>"#;
455
456        let config = FromXmlConfig {
457            infer_lists: true,
458            ..Default::default()
459        };
460
461        let doc = from_xml(xml, &config).unwrap();
462
463        // Should infer a list from repeated <user> elements
464        assert!(doc.root.contains_key("user"));
465        if let Some(Item::List(list)) = doc.root.get("user") {
466            assert_eq!(list.rows.len(), 2);
467        }
468    }
469
470    #[test]
471    fn test_attributes_as_values() {
472        let xml = r#"<?xml version="1.0" encoding="UTF-8"?>
473        <hedl>
474            <item id="123" name="test" active="true"/>
475        </hedl>"#;
476
477        let config = FromXmlConfig::default();
478        let doc = from_xml(xml, &config).unwrap();
479
480        assert!(doc.root.contains_key("item"));
481        if let Some(Item::Object(obj)) = doc.root.get("item") {
482            // "123" is inferred as an integer (type inference is correct)
483            assert_eq!(
484                obj.get("id").and_then(|i| i.as_scalar()),
485                Some(&Value::Int(123))
486            );
487            assert_eq!(
488                obj.get("name").and_then(|i| i.as_scalar()),
489                Some(&Value::String("test".to_string()))
490            );
491            assert_eq!(
492                obj.get("active").and_then(|i| i.as_scalar()),
493                Some(&Value::Bool(true))
494            );
495        }
496    }
497}