Skip to main content

shape_runtime/stdlib/
xml.rs

1//! Native `xml` module for XML parsing and serialization.
2//!
3//! Exports: xml.parse(text), xml.stringify(value)
4//!
5//! XML nodes are represented as Shape HashMaps with structure:
6//! `{ name: string, attributes: HashMap, children: Array, text?: string }`
7
8use crate::module_exports::{ModuleContext, ModuleExports, ModuleFunction, ModuleParam};
9use quick_xml::events::{BytesEnd, BytesStart, BytesText, Event};
10use quick_xml::{Reader, Writer};
11use shape_value::ValueWord;
12use std::io::Cursor;
13use std::sync::Arc;
14
15/// Parse an XML element recursively from a quick-xml reader.
16/// Returns a ValueWord HashMap: { name, attributes, children, text? }
17fn parse_element(reader: &mut Reader<&[u8]>, start: &BytesStart) -> Result<ValueWord, String> {
18    let name = std::str::from_utf8(start.name().as_ref())
19        .map_err(|e| format!("Invalid UTF-8 in element name: {}", e))?
20        .to_string();
21
22    // Parse attributes
23    let mut attr_keys = Vec::new();
24    let mut attr_values = Vec::new();
25    for attr in start.attributes() {
26        let attr = attr.map_err(|e| format!("Invalid attribute: {}", e))?;
27        let key = std::str::from_utf8(attr.key.as_ref())
28            .map_err(|e| format!("Invalid UTF-8 in attribute key: {}", e))?
29            .to_string();
30        let value = attr
31            .unescape_value()
32            .map_err(|e| format!("Invalid attribute value: {}", e))?
33            .to_string();
34        attr_keys.push(ValueWord::from_string(Arc::new(key)));
35        attr_values.push(ValueWord::from_string(Arc::new(value)));
36    }
37    let attributes = ValueWord::from_hashmap_pairs(attr_keys, attr_values);
38
39    // Parse children and text
40    let mut children = Vec::new();
41    let mut text_parts = Vec::new();
42    let mut buf = Vec::new();
43
44    loop {
45        match reader.read_event_into(&mut buf) {
46            Ok(Event::Start(ref e)) => {
47                let child = parse_element(reader, e)?;
48                children.push(child);
49            }
50            Ok(Event::Empty(ref e)) => {
51                // Self-closing element — treated as element with no children
52                let child = parse_empty_element(e)?;
53                children.push(child);
54            }
55            Ok(Event::Text(ref e)) => {
56                let t = e
57                    .unescape()
58                    .map_err(|err| format!("Error unescaping text: {}", err))?
59                    .to_string();
60                let trimmed = t.trim().to_string();
61                if !trimmed.is_empty() {
62                    text_parts.push(trimmed);
63                }
64            }
65            Ok(Event::CData(ref e)) => {
66                let t = std::str::from_utf8(e.as_ref())
67                    .map_err(|err| format!("Invalid UTF-8 in CDATA: {}", err))?
68                    .to_string();
69                if !t.trim().is_empty() {
70                    text_parts.push(t);
71                }
72            }
73            Ok(Event::End(_)) => break,
74            Ok(Event::Eof) => {
75                return Err("Unexpected end of XML".to_string());
76            }
77            Ok(_) => {} // Skip comments, PI, etc.
78            Err(e) => return Err(format!("XML parse error: {}", e)),
79        }
80        buf.clear();
81    }
82
83    // Build the node HashMap: { name, attributes, children, text? }
84    let mut node_keys = vec![
85        ValueWord::from_string(Arc::new("name".to_string())),
86        ValueWord::from_string(Arc::new("attributes".to_string())),
87        ValueWord::from_string(Arc::new("children".to_string())),
88    ];
89    let mut node_values = vec![
90        ValueWord::from_string(Arc::new(name)),
91        attributes,
92        ValueWord::from_array(Arc::new(children)),
93    ];
94
95    if !text_parts.is_empty() {
96        node_keys.push(ValueWord::from_string(Arc::new("text".to_string())));
97        node_values.push(ValueWord::from_string(Arc::new(text_parts.join(""))));
98    }
99
100    Ok(ValueWord::from_hashmap_pairs(node_keys, node_values))
101}
102
103/// Parse a self-closing XML element (e.g. `<br/>`).
104fn parse_empty_element(start: &BytesStart) -> Result<ValueWord, String> {
105    let name = std::str::from_utf8(start.name().as_ref())
106        .map_err(|e| format!("Invalid UTF-8 in element name: {}", e))?
107        .to_string();
108
109    let mut attr_keys = Vec::new();
110    let mut attr_values = Vec::new();
111    for attr in start.attributes() {
112        let attr = attr.map_err(|e| format!("Invalid attribute: {}", e))?;
113        let key = std::str::from_utf8(attr.key.as_ref())
114            .map_err(|e| format!("Invalid UTF-8 in attribute key: {}", e))?
115            .to_string();
116        let value = attr
117            .unescape_value()
118            .map_err(|e| format!("Invalid attribute value: {}", e))?
119            .to_string();
120        attr_keys.push(ValueWord::from_string(Arc::new(key)));
121        attr_values.push(ValueWord::from_string(Arc::new(value)));
122    }
123    let attributes = ValueWord::from_hashmap_pairs(attr_keys, attr_values);
124
125    let node_keys = vec![
126        ValueWord::from_string(Arc::new("name".to_string())),
127        ValueWord::from_string(Arc::new("attributes".to_string())),
128        ValueWord::from_string(Arc::new("children".to_string())),
129    ];
130    let node_values = vec![
131        ValueWord::from_string(Arc::new(name)),
132        attributes,
133        ValueWord::from_array(Arc::new(Vec::new())),
134    ];
135
136    Ok(ValueWord::from_hashmap_pairs(node_keys, node_values))
137}
138
139/// Write a Shape node HashMap to XML using quick-xml Writer.
140fn write_node(writer: &mut Writer<Cursor<Vec<u8>>>, node: &ValueWord) -> Result<(), String> {
141    let (keys, values, _) = node
142        .as_hashmap()
143        .ok_or_else(|| "xml.stringify(): node must be a HashMap".to_string())?;
144
145    // Extract fields by key
146    let mut name_val = None;
147    let mut attrs_val = None;
148    let mut children_val = None;
149    let mut text_val = None;
150
151    for (k, v) in keys.iter().zip(values.iter()) {
152        match k.as_str() {
153            Some("name") => name_val = Some(v),
154            Some("attributes") => attrs_val = Some(v),
155            Some("children") => children_val = Some(v),
156            Some("text") => text_val = Some(v),
157            _ => {}
158        }
159    }
160
161    let name = name_val
162        .and_then(|v| v.as_str())
163        .ok_or_else(|| "xml.stringify(): node missing 'name' field".to_string())?;
164
165    let mut elem = BytesStart::new(name.to_string());
166
167    // Add attributes
168    if let Some(attrs) = attrs_val {
169        if let Some((attr_keys, attr_values, _)) = attrs.as_hashmap() {
170            for (ak, av) in attr_keys.iter().zip(attr_values.iter()) {
171                if let (Some(key), Some(val)) = (ak.as_str(), av.as_str()) {
172                    elem.push_attribute((key, val));
173                }
174            }
175        }
176    }
177
178    // Check if there are children or text
179    let has_children = children_val
180        .and_then(|v| v.as_any_array())
181        .map(|a| !a.to_generic().is_empty())
182        .unwrap_or(false);
183    let has_text = text_val.and_then(|v| v.as_str()).is_some();
184
185    if !has_children && !has_text {
186        // Self-closing
187        writer
188            .write_event(Event::Empty(elem))
189            .map_err(|e| format!("xml.stringify() write error: {}", e))?;
190    } else {
191        writer
192            .write_event(Event::Start(elem.clone()))
193            .map_err(|e| format!("xml.stringify() write error: {}", e))?;
194
195        // Write text
196        if let Some(text) = text_val.and_then(|v| v.as_str()) {
197            writer
198                .write_event(Event::Text(BytesText::new(text)))
199                .map_err(|e| format!("xml.stringify() write error: {}", e))?;
200        }
201
202        // Write children
203        if let Some(children) = children_val {
204            if let Some(arr) = children.as_any_array() {
205                for child in arr.to_generic().iter() {
206                    write_node(writer, child)?;
207                }
208            }
209        }
210
211        writer
212            .write_event(Event::End(BytesEnd::new(name.to_string())))
213            .map_err(|e| format!("xml.stringify() write error: {}", e))?;
214    }
215
216    Ok(())
217}
218
219/// Create the `xml` module with XML parsing and serialization functions.
220pub fn create_xml_module() -> ModuleExports {
221    let mut module = ModuleExports::new("std::core::xml");
222    module.description = "XML parsing and serialization".to_string();
223
224    // xml.parse(text: string) -> Result<HashMap>
225    module.add_function_with_schema(
226        "parse",
227        |args: &[ValueWord], _ctx: &ModuleContext| {
228            let text = args
229                .first()
230                .and_then(|a| a.as_str())
231                .ok_or_else(|| "xml.parse() requires a string argument".to_string())?;
232
233            let mut reader = Reader::from_str(text);
234            reader.config_mut().trim_text(true);
235            let mut buf = Vec::new();
236
237            // Find the root element
238            loop {
239                match reader.read_event_into(&mut buf) {
240                    Ok(Event::Start(ref e)) => {
241                        let result = parse_element(&mut reader, e)?;
242                        return Ok(ValueWord::from_ok(result));
243                    }
244                    Ok(Event::Empty(ref e)) => {
245                        let result = parse_empty_element(e)?;
246                        return Ok(ValueWord::from_ok(result));
247                    }
248                    Ok(Event::Eof) => {
249                        return Err("xml.parse(): no root element found".to_string());
250                    }
251                    Ok(_) => {} // Skip declaration, comments, PI
252                    Err(e) => {
253                        return Err(format!("xml.parse() failed: {}", e));
254                    }
255                }
256                buf.clear();
257            }
258        },
259        ModuleFunction {
260            description: "Parse an XML string into a Shape HashMap node".to_string(),
261            params: vec![ModuleParam {
262                name: "text".to_string(),
263                type_name: "string".to_string(),
264                required: true,
265                description: "XML string to parse".to_string(),
266                ..Default::default()
267            }],
268            return_type: Some("Result<HashMap>".to_string()),
269        },
270    );
271
272    // xml.stringify(value: HashMap) -> Result<string>
273    module.add_function_with_schema(
274        "stringify",
275        |args: &[ValueWord], _ctx: &ModuleContext| {
276            let value = args
277                .first()
278                .ok_or_else(|| "xml.stringify() requires a value argument".to_string())?;
279
280            let mut writer = Writer::new(Cursor::new(Vec::new()));
281            write_node(&mut writer, value)?;
282
283            let output = String::from_utf8(writer.into_inner().into_inner())
284                .map_err(|e| format!("xml.stringify(): invalid UTF-8 output: {}", e))?;
285
286            Ok(ValueWord::from_ok(ValueWord::from_string(Arc::new(output))))
287        },
288        ModuleFunction {
289            description: "Serialize a Shape HashMap node to an XML string".to_string(),
290            params: vec![ModuleParam {
291                name: "value".to_string(),
292                type_name: "HashMap".to_string(),
293                required: true,
294                description:
295                    "Node value to serialize (with name, attributes, children, text? fields)"
296                        .to_string(),
297                ..Default::default()
298            }],
299            return_type: Some("Result<string>".to_string()),
300        },
301    );
302
303    module
304}
305
306#[cfg(test)]
307mod tests {
308    use super::*;
309
310    fn test_ctx() -> crate::module_exports::ModuleContext<'static> {
311        let registry = Box::leak(Box::new(crate::type_schema::TypeSchemaRegistry::new()));
312        crate::module_exports::ModuleContext {
313            schemas: registry,
314            invoke_callable: None,
315            raw_invoker: None,
316            function_hashes: None,
317            vm_state: None,
318            granted_permissions: None,
319            scope_constraints: None,
320            set_pending_resume: None,
321            set_pending_frame_resume: None,
322        }
323    }
324
325    #[test]
326    fn test_xml_module_creation() {
327        let module = create_xml_module();
328        assert_eq!(module.name, "std::core::xml");
329        assert!(module.has_export("parse"));
330        assert!(module.has_export("stringify"));
331    }
332
333    #[test]
334    fn test_xml_parse_simple() {
335        let module = create_xml_module();
336        let parse_fn = module.get_export("parse").unwrap();
337        let ctx = test_ctx();
338        let input =
339            ValueWord::from_string(Arc::new("<root><child>hello</child></root>".to_string()));
340        let result = parse_fn(&[input], &ctx).unwrap();
341        let inner = result.as_ok_inner().expect("should be Ok");
342        let (keys, values, _) = inner.as_hashmap().expect("should be hashmap");
343        // Find the "name" field
344        let mut found_name = false;
345        for (k, v) in keys.iter().zip(values.iter()) {
346            if k.as_str() == Some("name") {
347                assert_eq!(v.as_str(), Some("root"));
348                found_name = true;
349            }
350        }
351        assert!(found_name, "should have a 'name' field");
352    }
353
354    #[test]
355    fn test_xml_parse_with_attributes() {
356        let module = create_xml_module();
357        let parse_fn = module.get_export("parse").unwrap();
358        let ctx = test_ctx();
359        let input = ValueWord::from_string(Arc::new(
360            r#"<person name="Alice" age="30">text</person>"#.to_string(),
361        ));
362        let result = parse_fn(&[input], &ctx).unwrap();
363        let inner = result.as_ok_inner().expect("should be Ok");
364        let (keys, values, _) = inner.as_hashmap().expect("should be hashmap");
365
366        // Find attributes
367        for (k, v) in keys.iter().zip(values.iter()) {
368            if k.as_str() == Some("attributes") {
369                let (attr_keys, _attr_values, _) = v.as_hashmap().expect("attrs should be hashmap");
370                assert_eq!(attr_keys.len(), 2);
371            }
372            if k.as_str() == Some("text") {
373                assert_eq!(v.as_str(), Some("text"));
374            }
375        }
376    }
377
378    #[test]
379    fn test_xml_parse_nested() {
380        let module = create_xml_module();
381        let parse_fn = module.get_export("parse").unwrap();
382        let ctx = test_ctx();
383        let input = ValueWord::from_string(Arc::new(
384            "<config><db><host>localhost</host><port>5432</port></db></config>".to_string(),
385        ));
386        let result = parse_fn(&[input], &ctx).unwrap();
387        let inner = result.as_ok_inner().expect("should be Ok");
388        let (keys, values, _) = inner.as_hashmap().expect("should be hashmap");
389
390        // Find children
391        for (k, v) in keys.iter().zip(values.iter()) {
392            if k.as_str() == Some("children") {
393                let arr = v.as_any_array().expect("should be array").to_generic();
394                assert_eq!(arr.len(), 1); // <db>
395            }
396        }
397    }
398
399    #[test]
400    fn test_xml_parse_self_closing() {
401        let module = create_xml_module();
402        let parse_fn = module.get_export("parse").unwrap();
403        let ctx = test_ctx();
404        let input = ValueWord::from_string(Arc::new(r#"<br class="spacer"/>"#.to_string()));
405        let result = parse_fn(&[input], &ctx).unwrap();
406        let inner = result.as_ok_inner().expect("should be Ok");
407        let (keys, values, _) = inner.as_hashmap().expect("should be hashmap");
408
409        let mut found_name = false;
410        for (k, v) in keys.iter().zip(values.iter()) {
411            if k.as_str() == Some("name") {
412                assert_eq!(v.as_str(), Some("br"));
413                found_name = true;
414            }
415        }
416        assert!(found_name);
417    }
418
419    #[test]
420    fn test_xml_parse_no_root() {
421        let module = create_xml_module();
422        let parse_fn = module.get_export("parse").unwrap();
423        let ctx = test_ctx();
424        let input = ValueWord::from_string(Arc::new("".to_string()));
425        let result = parse_fn(&[input], &ctx);
426        assert!(result.is_err());
427    }
428
429    #[test]
430    fn test_xml_parse_requires_string() {
431        let module = create_xml_module();
432        let parse_fn = module.get_export("parse").unwrap();
433        let ctx = test_ctx();
434        let result = parse_fn(&[ValueWord::from_f64(42.0)], &ctx);
435        assert!(result.is_err());
436    }
437
438    #[test]
439    fn test_xml_stringify_simple() {
440        let module = create_xml_module();
441        let stringify_fn = module.get_export("stringify").unwrap();
442        let ctx = test_ctx();
443
444        // Build a node: { name: "root", attributes: {}, children: [], text: "hello" }
445        let node_keys = vec![
446            ValueWord::from_string(Arc::new("name".to_string())),
447            ValueWord::from_string(Arc::new("attributes".to_string())),
448            ValueWord::from_string(Arc::new("children".to_string())),
449            ValueWord::from_string(Arc::new("text".to_string())),
450        ];
451        let node_values = vec![
452            ValueWord::from_string(Arc::new("root".to_string())),
453            ValueWord::from_hashmap_pairs(vec![], vec![]),
454            ValueWord::from_array(Arc::new(vec![])),
455            ValueWord::from_string(Arc::new("hello".to_string())),
456        ];
457        let node = ValueWord::from_hashmap_pairs(node_keys, node_values);
458
459        let result = stringify_fn(&[node], &ctx).unwrap();
460        let inner = result.as_ok_inner().expect("should be Ok");
461        let s = inner.as_str().expect("should be string");
462        assert!(s.contains("<root>"));
463        assert!(s.contains("hello"));
464        assert!(s.contains("</root>"));
465    }
466
467    #[test]
468    fn test_xml_stringify_with_attributes() {
469        let module = create_xml_module();
470        let stringify_fn = module.get_export("stringify").unwrap();
471        let ctx = test_ctx();
472
473        let attr_keys = vec![ValueWord::from_string(Arc::new("id".to_string()))];
474        let attr_values = vec![ValueWord::from_string(Arc::new("42".to_string()))];
475        let attrs = ValueWord::from_hashmap_pairs(attr_keys, attr_values);
476
477        let node_keys = vec![
478            ValueWord::from_string(Arc::new("name".to_string())),
479            ValueWord::from_string(Arc::new("attributes".to_string())),
480            ValueWord::from_string(Arc::new("children".to_string())),
481        ];
482        let node_values = vec![
483            ValueWord::from_string(Arc::new("item".to_string())),
484            attrs,
485            ValueWord::from_array(Arc::new(vec![])),
486        ];
487        let node = ValueWord::from_hashmap_pairs(node_keys, node_values);
488
489        let result = stringify_fn(&[node], &ctx).unwrap();
490        let inner = result.as_ok_inner().expect("should be Ok");
491        let s = inner.as_str().expect("should be string");
492        assert!(s.contains("id=\"42\""));
493    }
494
495    #[test]
496    fn test_xml_stringify_self_closing() {
497        let module = create_xml_module();
498        let stringify_fn = module.get_export("stringify").unwrap();
499        let ctx = test_ctx();
500
501        let node_keys = vec![
502            ValueWord::from_string(Arc::new("name".to_string())),
503            ValueWord::from_string(Arc::new("attributes".to_string())),
504            ValueWord::from_string(Arc::new("children".to_string())),
505        ];
506        let node_values = vec![
507            ValueWord::from_string(Arc::new("br".to_string())),
508            ValueWord::from_hashmap_pairs(vec![], vec![]),
509            ValueWord::from_array(Arc::new(vec![])),
510        ];
511        let node = ValueWord::from_hashmap_pairs(node_keys, node_values);
512
513        let result = stringify_fn(&[node], &ctx).unwrap();
514        let inner = result.as_ok_inner().expect("should be Ok");
515        let s = inner.as_str().expect("should be string");
516        assert!(s.contains("<br/>") || s.contains("<br />"));
517    }
518
519    #[test]
520    fn test_xml_roundtrip() {
521        let module = create_xml_module();
522        let parse_fn = module.get_export("parse").unwrap();
523        let stringify_fn = module.get_export("stringify").unwrap();
524        let ctx = test_ctx();
525
526        let xml_str = r#"<root><child attr="val">text</child></root>"#;
527        let parsed = parse_fn(
528            &[ValueWord::from_string(Arc::new(xml_str.to_string()))],
529            &ctx,
530        )
531        .unwrap();
532        let inner = parsed.as_ok_inner().expect("should be Ok");
533        let re_stringified = stringify_fn(&[inner.clone()], &ctx).unwrap();
534        let re_str = re_stringified.as_ok_inner().expect("should be Ok");
535        let s = re_str.as_str().expect("should be string");
536        assert!(s.contains("root"));
537        assert!(s.contains("child"));
538        assert!(s.contains("text"));
539    }
540
541    #[test]
542    fn test_xml_schemas() {
543        let module = create_xml_module();
544
545        let parse_schema = module.get_schema("parse").unwrap();
546        assert_eq!(parse_schema.params.len(), 1);
547        assert_eq!(parse_schema.params[0].name, "text");
548        assert!(parse_schema.params[0].required);
549        assert_eq!(parse_schema.return_type.as_deref(), Some("Result<HashMap>"));
550
551        let stringify_schema = module.get_schema("stringify").unwrap();
552        assert_eq!(stringify_schema.params.len(), 1);
553        assert!(stringify_schema.params[0].required);
554    }
555
556    #[test]
557    fn test_xml_parse_with_declaration() {
558        let module = create_xml_module();
559        let parse_fn = module.get_export("parse").unwrap();
560        let ctx = test_ctx();
561        let input = ValueWord::from_string(Arc::new(
562            r#"<?xml version="1.0" encoding="UTF-8"?><root>hello</root>"#.to_string(),
563        ));
564        let result = parse_fn(&[input], &ctx).unwrap();
565        let inner = result.as_ok_inner().expect("should be Ok");
566        let (keys, values, _) = inner.as_hashmap().expect("should be hashmap");
567        let mut found_name = false;
568        for (k, v) in keys.iter().zip(values.iter()) {
569            if k.as_str() == Some("name") {
570                assert_eq!(v.as_str(), Some("root"));
571                found_name = true;
572            }
573        }
574        assert!(found_name);
575    }
576}