Skip to main content

openscenario_rs/parser/
xml.rs

1//! XML parsing implementation using quick-xml and serde
2//!
3//! This module provides efficient XML parsing and serialization for OpenSCENARIO documents
4//! with comprehensive error handling and validation capabilities.
5//!
6//! # Features
7//!
8//! - **High-performance parsing** using quick-xml with zero-copy deserialization
9//! - **Comprehensive validation** with detailed error reporting and suggestions
10//! - **Catalog support** for reusable component libraries
11//! - **UTF-8 BOM handling** for cross-platform compatibility
12//! - **Pretty-printed output** with configurable formatting
13//!
14//! # Basic Usage
15//!
16//! ## Parsing Scenarios
17//!
18//! ```rust,no_run
19//! use openscenario_rs::{parse_from_file, parse_from_str};
20//!
21//! # fn main() -> Result<(), Box<dyn std::error::Error>> {
22//! // Parse from file with automatic error context
23//! let scenario = parse_from_file("my_scenario.xosc")?;
24//! println!("Scenario author: {}", scenario.file_header.author);
25//!
26//! // Parse from XML string
27//! let xml = r#"<?xml version="1.0" encoding="UTF-8"?>
28//! <OpenSCENARIO>
29//!   <FileHeader revMajor="1" revMinor="3" date="2024-01-01T00:00:00"
30//!               author="Example" description="Test scenario"/>
31//!   <ScenarioDefinition>
32//!     <!-- scenario content -->
33//!   </ScenarioDefinition>
34//! </OpenSCENARIO>"#;
35//! let scenario = parse_from_str(xml)?;
36//! # Ok(())
37//! # }
38//! ```
39//!
40//! ## Serialization
41//!
42//! ```rust,no_run
43//! use openscenario_rs::{serialize_to_string, serialize_to_file};
44//!
45//! # fn main() -> Result<(), Box<dyn std::error::Error>> {
46//! # let scenario = openscenario_rs::parse_from_file("scenario.xosc")?;
47//! // Serialize to formatted XML string
48//! let xml_output = serialize_to_string(&scenario)?;
49//! println!("{}", xml_output);
50//!
51//! // Write directly to file
52//! serialize_to_file(&scenario, "output.xosc")?;
53//! # Ok(())
54//! # }
55//! ```
56//!
57//! # Catalog File Operations
58//!
59//! ```rust,no_run
60//! use openscenario_rs::parser::xml::{
61//!     parse_catalog_from_file, serialize_catalog_to_file,
62//!     parse_catalog_from_str_validated
63//! };
64//!
65//! # fn main() -> Result<(), Box<dyn std::error::Error>> {
66//! // Parse vehicle catalog
67//! let catalog = parse_catalog_from_file("vehicles.xosc")?;
68//!
69//! // Validate catalog structure
70//! let catalog_xml = openscenario_rs::serialize_catalog_to_string(&catalog)?;
71//! let validated_catalog = parse_catalog_from_str_validated(&catalog_xml)?;
72//!
73//! // Export modified catalog
74//! serialize_catalog_to_file(&catalog, "updated_vehicles.xosc")?;
75//! # Ok(())
76//! # }
77//! ```
78//!
79//! # Error Handling
80//!
81//! All parsing functions return `Result<T>` with detailed error context:
82//!
83//! ```rust,no_run
84//! # use openscenario_rs::parser::xml::parse_from_file;
85//! match parse_from_file("scenario.xosc") {
86//!     Ok(scenario) => {
87//!         // Process valid scenario
88//!         println!("Loaded scenario with {} entities",
89//!                  scenario.entities.as_ref().map_or(0, |e| e.scenario_objects.len()));
90//!     }
91//!     Err(e) => {
92//!         eprintln!("Parse error: {}", e);
93//!         // Error includes file path and specific parsing context
94//!     }
95//! }
96//! ```
97//!
98//! # Performance Notes
99//!
100//! - Use `parse_from_file` for fastest parsing without validation
101//! - Use `parse_from_file_validated` when you need structure validation
102//! - For very large files (>50MB), consider chunked processing
103//! - Validation adds ~10-15% overhead but catches malformed XML early
104
105use crate::error::{Error, Result};
106use crate::types::catalogs::files::CatalogFile;
107use crate::types::scenario::storyboard::OpenScenario;
108use markup_fmt::{config::FormatOptions, format_text, Language};
109use std::fs;
110use std::path::Path;
111
112/// Maximum file size for parsing (100 MB)
113const MAX_FILE_SIZE: u64 = 100 * 1024 * 1024;
114
115/// Remove BOM (Byte Order Mark) if present
116fn remove_bom(content: &str) -> &str {
117    // UTF-8 BOM: EF BB BF (represented as \u{FEFF} in decoded string)
118    if content.starts_with('\u{FEFF}') {
119        // The character is 3 bytes in UTF-8, but as a char it's 1 character
120        &content['\u{FEFF}'.len_utf8()..]
121    } else {
122        content
123    }
124}
125
126/// Internal helper to parse OpenSCENARIO from file
127fn parse_from_file_internal<P: AsRef<Path>>(path: P, validate_xml: bool) -> Result<OpenScenario> {
128    let metadata = fs::metadata(&path).map_err(Error::from).map_err(|e| {
129        e.with_context(&format!(
130            "Failed to read file metadata: {}",
131            path.as_ref().display()
132        ))
133    })?;
134
135    if metadata.len() > MAX_FILE_SIZE {
136        return Err(Error::out_of_range(
137            "file_size",
138            &metadata.len().to_string(),
139            "0",
140            &MAX_FILE_SIZE.to_string(),
141        ));
142    }
143
144    let xml_content = fs::read_to_string(&path)
145        .map_err(Error::from)
146        .map_err(|e| {
147            e.with_context(&format!("Failed to read file: {}", path.as_ref().display()))
148        })?;
149
150    let cleaned_content = remove_bom(&xml_content);
151
152    if validate_xml {
153        validate_xml_structure(cleaned_content).map_err(|e| {
154            e.with_context(&format!(
155                "XML validation failed for file: {}",
156                path.as_ref().display()
157            ))
158        })?;
159    }
160
161    parse_from_str(cleaned_content).map_err(|e| {
162        e.with_context(&format!(
163            "Failed to parse file: {}",
164            path.as_ref().display()
165        ))
166    })
167}
168
169/// Internal helper to parse catalog from file
170fn parse_catalog_from_file_internal<P: AsRef<Path>>(
171    path: P,
172    validate_xml: bool,
173) -> Result<CatalogFile> {
174    let metadata = fs::metadata(&path).map_err(Error::from).map_err(|e| {
175        e.with_context(&format!(
176            "Failed to read catalog file metadata: {}",
177            path.as_ref().display()
178        ))
179    })?;
180
181    if metadata.len() > MAX_FILE_SIZE {
182        return Err(Error::out_of_range(
183            "file_size",
184            &metadata.len().to_string(),
185            "0",
186            &MAX_FILE_SIZE.to_string(),
187        ));
188    }
189
190    let xml_content = fs::read_to_string(&path)
191        .map_err(Error::from)
192        .map_err(|e| {
193            e.with_context(&format!(
194                "Failed to read catalog file: {}",
195                path.as_ref().display()
196            ))
197        })?;
198
199    let cleaned_content = remove_bom(&xml_content);
200
201    if validate_xml {
202        validate_catalog_xml_structure(cleaned_content).map_err(|e| {
203            e.with_context(&format!(
204                "XML validation failed for catalog file: {}",
205                path.as_ref().display()
206            ))
207        })?;
208    }
209
210    parse_catalog_from_str(cleaned_content).map_err(|e| {
211        e.with_context(&format!(
212            "Failed to parse catalog file: {}",
213            path.as_ref().display()
214        ))
215    })
216}
217
218/// Parse an OpenSCENARIO document from a string
219///
220/// This function uses quick-xml's serde integration to deserialize
221/// XML into our Rust type system.
222#[must_use = "parsing result should be handled"]
223pub fn parse_from_str(xml: &str) -> Result<OpenScenario> {
224    quick_xml::de::from_str(xml)
225        .map_err(Error::from)
226        .map_err(|e| e.with_context("Failed to parse OpenSCENARIO XML"))
227}
228
229/// Parse an OpenSCENARIO document from a file
230///
231/// Reads file into memory and then parses it as a string.
232#[must_use = "parsing result should be handled"]
233pub fn parse_from_file<P: AsRef<Path>>(path: P) -> Result<OpenScenario> {
234    parse_from_file_internal(path, false)
235}
236
237/// Serialize an OpenSCENARIO document to XML string
238///
239/// This function uses quick-xml's serde integration to serialize
240/// our Rust types back to XML format.
241#[must_use = "serialization result should be handled"]
242pub fn serialize_to_string(scenario: &OpenScenario) -> Result<String> {
243    let mut xml = String::from(r#"<?xml version="1.0" encoding="UTF-8"?>"#);
244    xml.push('\n');
245
246    let serialized = quick_xml::se::to_string(scenario)
247        .map_err(Error::XmlSerializeError)
248        .map_err(|e| e.with_context("Failed to serialize OpenSCENARIO to XML"))?;
249    let s = format_text(
250        &serialized,
251        Language::Xml,
252        &FormatOptions::default(),
253        |serialized, _| Ok::<_, std::convert::Infallible>(serialized.into()),
254    )
255    .unwrap();
256    xml.push_str(&s);
257    Ok(xml)
258}
259
260/// Serialize an OpenSCENARIO document to a file
261///
262/// Serializes the scenario to XML and writes it to the specified file.
263#[must_use = "serialization result should be handled"]
264pub fn serialize_to_file<P: AsRef<Path>>(scenario: &OpenScenario, path: P) -> Result<()> {
265    let xml = serialize_to_string(scenario)?;
266
267    fs::write(&path, xml).map_err(Error::from).map_err(|e| {
268        e.with_context(&format!(
269            "Failed to write file: {}",
270            path.as_ref().display()
271        ))
272    })
273}
274
275/// Validate XML structure before parsing
276///
277/// This function performs basic XML structure validation to provide
278/// better error messages for malformed documents.
279pub fn validate_xml_structure(xml: &str) -> Result<()> {
280    // Basic validation - check for XML declaration and root element
281    let trimmed = xml.trim();
282
283    if trimmed.is_empty() {
284        return Err(Error::invalid_xml("XML document is empty"));
285    }
286
287    if !trimmed.starts_with("<?xml") && !trimmed.starts_with('<') {
288        return Err(Error::invalid_xml(
289            "XML document must start with XML declaration or root element",
290        ));
291    }
292
293    if !trimmed.contains("OpenSCENARIO") {
294        return Err(Error::invalid_xml(
295            "Document does not appear to contain OpenSCENARIO root element",
296        ));
297    }
298
299    Ok(())
300}
301
302/// Parse with validation
303///
304/// Validates the XML structure before attempting to parse it.
305#[must_use = "parsing result should be handled"]
306pub fn parse_from_str_validated(xml: &str) -> Result<OpenScenario> {
307    validate_xml_structure(xml)?;
308    parse_from_str(xml)
309}
310
311/// Parse file with validation
312///
313/// Validates the XML structure before attempting to parse it.
314#[must_use = "parsing result should be handled"]
315pub fn parse_from_file_validated<P: AsRef<Path>>(path: P) -> Result<OpenScenario> {
316    parse_from_file_internal(path, true)
317}
318
319// Catalog parsing functions
320
321/// Parse a catalog file from XML string
322///
323/// This function uses quick-xml's serde integration to deserialize
324/// catalog XML into our catalog file structure.
325#[must_use = "parsing result should be handled"]
326pub fn parse_catalog_from_str(xml: &str) -> Result<CatalogFile> {
327    quick_xml::de::from_str(xml)
328        .map_err(Error::from)
329        .map_err(|e| e.with_context("Failed to parse catalog XML"))
330}
331
332/// Parse a catalog file from a file path
333///
334/// Reads the catalog file into memory and then parses it as a string.
335#[must_use = "parsing result should be handled"]
336pub fn parse_catalog_from_file<P: AsRef<Path>>(path: P) -> Result<CatalogFile> {
337    parse_catalog_from_file_internal(path, false)
338}
339
340/// Validate catalog XML structure before parsing
341///
342/// This function performs basic XML structure validation specific to catalog files.
343pub fn validate_catalog_xml_structure(xml: &str) -> Result<()> {
344    let trimmed = xml.trim();
345
346    if trimmed.is_empty() {
347        return Err(Error::invalid_xml("Catalog XML document is empty"));
348    }
349
350    if !trimmed.starts_with("<?xml") && !trimmed.starts_with('<') {
351        return Err(Error::invalid_xml(
352            "Catalog XML document must start with XML declaration or root element",
353        ));
354    }
355
356    if !trimmed.contains("OpenSCENARIO") {
357        return Err(Error::invalid_xml(
358            "Document does not appear to contain OpenSCENARIO root element",
359        ));
360    }
361
362    if !trimmed.contains("Catalog") {
363        return Err(Error::invalid_xml(
364            "Document does not appear to contain Catalog element",
365        ));
366    }
367
368    Ok(())
369}
370
371/// Parse catalog with validation
372///
373/// Validates the XML structure before attempting to parse it.
374#[must_use = "parsing result should be handled"]
375pub fn parse_catalog_from_str_validated(xml: &str) -> Result<CatalogFile> {
376    validate_catalog_xml_structure(xml)?;
377    parse_catalog_from_str(xml)
378}
379
380/// Parse catalog file with validation
381///
382/// Validates the XML structure before attempting to parse it.
383#[must_use = "parsing result should be handled"]
384pub fn parse_catalog_from_file_validated<P: AsRef<Path>>(path: P) -> Result<CatalogFile> {
385    parse_catalog_from_file_internal(path, true)
386}
387
388/// Serialize a catalog file to XML string
389///
390/// This function uses quick-xml's serde integration to serialize
391/// our catalog types back to XML format.
392pub fn serialize_catalog_to_string(catalog: &CatalogFile) -> Result<String> {
393    let mut xml = String::from(r#"<?xml version="1.0" encoding="UTF-8"?>"#);
394    xml.push('\n');
395
396    let serialized = quick_xml::se::to_string(catalog)
397        .map_err(Error::XmlSerializeError)
398        .map_err(|e| e.with_context("Failed to serialize catalog to XML"))?;
399
400    xml.push_str(&serialized);
401    Ok(xml)
402}
403
404/// Serialize a catalog file to a file path
405///
406/// Serializes the catalog to XML and writes it to the specified file.
407pub fn serialize_catalog_to_file<P: AsRef<Path>>(catalog: &CatalogFile, path: P) -> Result<()> {
408    let xml = serialize_catalog_to_string(catalog)?;
409
410    fs::write(&path, xml).map_err(Error::from).map_err(|e| {
411        e.with_context(&format!(
412            "Failed to write catalog file: {}",
413            path.as_ref().display()
414        ))
415    })
416}
417
418#[cfg(test)]
419mod tests {
420    use super::*;
421
422    #[test]
423    fn test_validate_xml_structure() {
424        // Valid XML
425        assert!(
426            validate_xml_structure(r#"<?xml version="1.0"?><OpenSCENARIO></OpenSCENARIO>"#).is_ok()
427        );
428
429        // Missing XML declaration is OK
430        assert!(validate_xml_structure(r#"<OpenSCENARIO></OpenSCENARIO>"#).is_ok());
431
432        // Empty XML should fail
433        assert!(validate_xml_structure("").is_err());
434        assert!(validate_xml_structure("   ").is_err());
435
436        // Non-XML content should fail
437        assert!(validate_xml_structure("This is not XML").is_err());
438
439        // Missing OpenSCENARIO root should fail
440        assert!(validate_xml_structure(r#"<SomeOtherRoot></SomeOtherRoot>"#).is_err());
441    }
442
443    #[test]
444    fn test_validate_catalog_xml_structure() {
445        // Valid catalog XML structure
446        let valid_xml = r#"<?xml version="1.0"?>
447        <OpenSCENARIO>
448            <FileHeader revMajor="1" revMinor="3" date="2024-01-01T00:00:00" author="Test" description="Test"/>
449            <Catalog name="test">
450            </Catalog>
451        </OpenSCENARIO>"#;
452
453        assert!(validate_catalog_xml_structure(valid_xml).is_ok());
454
455        // Invalid - no Catalog element
456        let invalid_xml = r#"<?xml version="1.0"?><OpenSCENARIO><FileHeader/></OpenSCENARIO>"#;
457        assert!(validate_catalog_xml_structure(invalid_xml).is_err());
458
459        // Invalid - empty
460        assert!(validate_catalog_xml_structure("").is_err());
461    }
462
463    #[test]
464    fn test_catalog_serialization_roundtrip() {
465        let catalog = CatalogFile::default();
466
467        let xml = serialize_catalog_to_string(&catalog).unwrap();
468        assert!(xml.contains("<?xml version=\"1.0\" encoding=\"UTF-8\"?>"));
469        assert!(xml.contains("OpenSCENARIO"));
470        assert!(xml.contains("Catalog"));
471    }
472}