1use crate::module_exports::{ModuleContext, ModuleExports, ModuleFunction, ModuleParam};
9use quick_xml::events::{BytesEnd, BytesStart, BytesText, Event};
10use quick_xml::{Reader, Writer};
11use shape_value::ValueWord;
12use std::io::Cursor;
13use std::sync::Arc;
14
15fn parse_element(reader: &mut Reader<&[u8]>, start: &BytesStart) -> Result<ValueWord, String> {
18 let name = std::str::from_utf8(start.name().as_ref())
19 .map_err(|e| format!("Invalid UTF-8 in element name: {}", e))?
20 .to_string();
21
22 let mut attr_keys = Vec::new();
24 let mut attr_values = Vec::new();
25 for attr in start.attributes() {
26 let attr = attr.map_err(|e| format!("Invalid attribute: {}", e))?;
27 let key = std::str::from_utf8(attr.key.as_ref())
28 .map_err(|e| format!("Invalid UTF-8 in attribute key: {}", e))?
29 .to_string();
30 let value = attr
31 .unescape_value()
32 .map_err(|e| format!("Invalid attribute value: {}", e))?
33 .to_string();
34 attr_keys.push(ValueWord::from_string(Arc::new(key)));
35 attr_values.push(ValueWord::from_string(Arc::new(value)));
36 }
37 let attributes = ValueWord::from_hashmap_pairs(attr_keys, attr_values);
38
39 let mut children = Vec::new();
41 let mut text_parts = Vec::new();
42 let mut buf = Vec::new();
43
44 loop {
45 match reader.read_event_into(&mut buf) {
46 Ok(Event::Start(ref e)) => {
47 let child = parse_element(reader, e)?;
48 children.push(child);
49 }
50 Ok(Event::Empty(ref e)) => {
51 let child = parse_empty_element(e)?;
53 children.push(child);
54 }
55 Ok(Event::Text(ref e)) => {
56 let t = e
57 .unescape()
58 .map_err(|err| format!("Error unescaping text: {}", err))?
59 .to_string();
60 let trimmed = t.trim().to_string();
61 if !trimmed.is_empty() {
62 text_parts.push(trimmed);
63 }
64 }
65 Ok(Event::CData(ref e)) => {
66 let t = std::str::from_utf8(e.as_ref())
67 .map_err(|err| format!("Invalid UTF-8 in CDATA: {}", err))?
68 .to_string();
69 if !t.trim().is_empty() {
70 text_parts.push(t);
71 }
72 }
73 Ok(Event::End(_)) => break,
74 Ok(Event::Eof) => {
75 return Err("Unexpected end of XML".to_string());
76 }
77 Ok(_) => {} Err(e) => return Err(format!("XML parse error: {}", e)),
79 }
80 buf.clear();
81 }
82
83 let mut node_keys = vec![
85 ValueWord::from_string(Arc::new("name".to_string())),
86 ValueWord::from_string(Arc::new("attributes".to_string())),
87 ValueWord::from_string(Arc::new("children".to_string())),
88 ];
89 let mut node_values = vec![
90 ValueWord::from_string(Arc::new(name)),
91 attributes,
92 ValueWord::from_array(Arc::new(children)),
93 ];
94
95 if !text_parts.is_empty() {
96 node_keys.push(ValueWord::from_string(Arc::new("text".to_string())));
97 node_values.push(ValueWord::from_string(Arc::new(text_parts.join(""))));
98 }
99
100 Ok(ValueWord::from_hashmap_pairs(node_keys, node_values))
101}
102
103fn parse_empty_element(start: &BytesStart) -> Result<ValueWord, String> {
105 let name = std::str::from_utf8(start.name().as_ref())
106 .map_err(|e| format!("Invalid UTF-8 in element name: {}", e))?
107 .to_string();
108
109 let mut attr_keys = Vec::new();
110 let mut attr_values = Vec::new();
111 for attr in start.attributes() {
112 let attr = attr.map_err(|e| format!("Invalid attribute: {}", e))?;
113 let key = std::str::from_utf8(attr.key.as_ref())
114 .map_err(|e| format!("Invalid UTF-8 in attribute key: {}", e))?
115 .to_string();
116 let value = attr
117 .unescape_value()
118 .map_err(|e| format!("Invalid attribute value: {}", e))?
119 .to_string();
120 attr_keys.push(ValueWord::from_string(Arc::new(key)));
121 attr_values.push(ValueWord::from_string(Arc::new(value)));
122 }
123 let attributes = ValueWord::from_hashmap_pairs(attr_keys, attr_values);
124
125 let node_keys = vec![
126 ValueWord::from_string(Arc::new("name".to_string())),
127 ValueWord::from_string(Arc::new("attributes".to_string())),
128 ValueWord::from_string(Arc::new("children".to_string())),
129 ];
130 let node_values = vec![
131 ValueWord::from_string(Arc::new(name)),
132 attributes,
133 ValueWord::from_array(Arc::new(Vec::new())),
134 ];
135
136 Ok(ValueWord::from_hashmap_pairs(node_keys, node_values))
137}
138
139fn write_node(writer: &mut Writer<Cursor<Vec<u8>>>, node: &ValueWord) -> Result<(), String> {
141 let (keys, values, _) = node
142 .as_hashmap()
143 .ok_or_else(|| "xml.stringify(): node must be a HashMap".to_string())?;
144
145 let mut name_val = None;
147 let mut attrs_val = None;
148 let mut children_val = None;
149 let mut text_val = None;
150
151 for (k, v) in keys.iter().zip(values.iter()) {
152 match k.as_str() {
153 Some("name") => name_val = Some(v),
154 Some("attributes") => attrs_val = Some(v),
155 Some("children") => children_val = Some(v),
156 Some("text") => text_val = Some(v),
157 _ => {}
158 }
159 }
160
161 let name = name_val
162 .and_then(|v| v.as_str())
163 .ok_or_else(|| "xml.stringify(): node missing 'name' field".to_string())?;
164
165 let mut elem = BytesStart::new(name.to_string());
166
167 if let Some(attrs) = attrs_val {
169 if let Some((attr_keys, attr_values, _)) = attrs.as_hashmap() {
170 for (ak, av) in attr_keys.iter().zip(attr_values.iter()) {
171 if let (Some(key), Some(val)) = (ak.as_str(), av.as_str()) {
172 elem.push_attribute((key, val));
173 }
174 }
175 }
176 }
177
178 let has_children = children_val
180 .and_then(|v| v.as_any_array())
181 .map(|a| !a.to_generic().is_empty())
182 .unwrap_or(false);
183 let has_text = text_val.and_then(|v| v.as_str()).is_some();
184
185 if !has_children && !has_text {
186 writer
188 .write_event(Event::Empty(elem))
189 .map_err(|e| format!("xml.stringify() write error: {}", e))?;
190 } else {
191 writer
192 .write_event(Event::Start(elem.clone()))
193 .map_err(|e| format!("xml.stringify() write error: {}", e))?;
194
195 if let Some(text) = text_val.and_then(|v| v.as_str()) {
197 writer
198 .write_event(Event::Text(BytesText::new(text)))
199 .map_err(|e| format!("xml.stringify() write error: {}", e))?;
200 }
201
202 if let Some(children) = children_val {
204 if let Some(arr) = children.as_any_array() {
205 for child in arr.to_generic().iter() {
206 write_node(writer, child)?;
207 }
208 }
209 }
210
211 writer
212 .write_event(Event::End(BytesEnd::new(name.to_string())))
213 .map_err(|e| format!("xml.stringify() write error: {}", e))?;
214 }
215
216 Ok(())
217}
218
219pub fn create_xml_module() -> ModuleExports {
221 let mut module = ModuleExports::new("std::core::xml");
222 module.description = "XML parsing and serialization".to_string();
223
224 module.add_function_with_schema(
226 "parse",
227 |args: &[ValueWord], _ctx: &ModuleContext| {
228 let text = args
229 .first()
230 .and_then(|a| a.as_str())
231 .ok_or_else(|| "xml.parse() requires a string argument".to_string())?;
232
233 let mut reader = Reader::from_str(text);
234 reader.config_mut().trim_text(true);
235 let mut buf = Vec::new();
236
237 loop {
239 match reader.read_event_into(&mut buf) {
240 Ok(Event::Start(ref e)) => {
241 let result = parse_element(&mut reader, e)?;
242 return Ok(ValueWord::from_ok(result));
243 }
244 Ok(Event::Empty(ref e)) => {
245 let result = parse_empty_element(e)?;
246 return Ok(ValueWord::from_ok(result));
247 }
248 Ok(Event::Eof) => {
249 return Err("xml.parse(): no root element found".to_string());
250 }
251 Ok(_) => {} Err(e) => {
253 return Err(format!("xml.parse() failed: {}", e));
254 }
255 }
256 buf.clear();
257 }
258 },
259 ModuleFunction {
260 description: "Parse an XML string into a Shape HashMap node".to_string(),
261 params: vec![ModuleParam {
262 name: "text".to_string(),
263 type_name: "string".to_string(),
264 required: true,
265 description: "XML string to parse".to_string(),
266 ..Default::default()
267 }],
268 return_type: Some("Result<HashMap>".to_string()),
269 },
270 );
271
272 module.add_function_with_schema(
274 "stringify",
275 |args: &[ValueWord], _ctx: &ModuleContext| {
276 let value = args
277 .first()
278 .ok_or_else(|| "xml.stringify() requires a value argument".to_string())?;
279
280 let mut writer = Writer::new(Cursor::new(Vec::new()));
281 write_node(&mut writer, value)?;
282
283 let output = String::from_utf8(writer.into_inner().into_inner())
284 .map_err(|e| format!("xml.stringify(): invalid UTF-8 output: {}", e))?;
285
286 Ok(ValueWord::from_ok(ValueWord::from_string(Arc::new(output))))
287 },
288 ModuleFunction {
289 description: "Serialize a Shape HashMap node to an XML string".to_string(),
290 params: vec![ModuleParam {
291 name: "value".to_string(),
292 type_name: "HashMap".to_string(),
293 required: true,
294 description:
295 "Node value to serialize (with name, attributes, children, text? fields)"
296 .to_string(),
297 ..Default::default()
298 }],
299 return_type: Some("Result<string>".to_string()),
300 },
301 );
302
303 module
304}
305
306#[cfg(test)]
307mod tests {
308 use super::*;
309
310 fn test_ctx() -> crate::module_exports::ModuleContext<'static> {
311 let registry = Box::leak(Box::new(crate::type_schema::TypeSchemaRegistry::new()));
312 crate::module_exports::ModuleContext {
313 schemas: registry,
314 invoke_callable: None,
315 raw_invoker: None,
316 function_hashes: None,
317 vm_state: None,
318 granted_permissions: None,
319 scope_constraints: None,
320 set_pending_resume: None,
321 set_pending_frame_resume: None,
322 }
323 }
324
325 #[test]
326 fn test_xml_module_creation() {
327 let module = create_xml_module();
328 assert_eq!(module.name, "std::core::xml");
329 assert!(module.has_export("parse"));
330 assert!(module.has_export("stringify"));
331 }
332
333 #[test]
334 fn test_xml_parse_simple() {
335 let module = create_xml_module();
336 let parse_fn = module.get_export("parse").unwrap();
337 let ctx = test_ctx();
338 let input =
339 ValueWord::from_string(Arc::new("<root><child>hello</child></root>".to_string()));
340 let result = parse_fn(&[input], &ctx).unwrap();
341 let inner = result.as_ok_inner().expect("should be Ok");
342 let (keys, values, _) = inner.as_hashmap().expect("should be hashmap");
343 let mut found_name = false;
345 for (k, v) in keys.iter().zip(values.iter()) {
346 if k.as_str() == Some("name") {
347 assert_eq!(v.as_str(), Some("root"));
348 found_name = true;
349 }
350 }
351 assert!(found_name, "should have a 'name' field");
352 }
353
354 #[test]
355 fn test_xml_parse_with_attributes() {
356 let module = create_xml_module();
357 let parse_fn = module.get_export("parse").unwrap();
358 let ctx = test_ctx();
359 let input = ValueWord::from_string(Arc::new(
360 r#"<person name="Alice" age="30">text</person>"#.to_string(),
361 ));
362 let result = parse_fn(&[input], &ctx).unwrap();
363 let inner = result.as_ok_inner().expect("should be Ok");
364 let (keys, values, _) = inner.as_hashmap().expect("should be hashmap");
365
366 for (k, v) in keys.iter().zip(values.iter()) {
368 if k.as_str() == Some("attributes") {
369 let (attr_keys, _attr_values, _) = v.as_hashmap().expect("attrs should be hashmap");
370 assert_eq!(attr_keys.len(), 2);
371 }
372 if k.as_str() == Some("text") {
373 assert_eq!(v.as_str(), Some("text"));
374 }
375 }
376 }
377
378 #[test]
379 fn test_xml_parse_nested() {
380 let module = create_xml_module();
381 let parse_fn = module.get_export("parse").unwrap();
382 let ctx = test_ctx();
383 let input = ValueWord::from_string(Arc::new(
384 "<config><db><host>localhost</host><port>5432</port></db></config>".to_string(),
385 ));
386 let result = parse_fn(&[input], &ctx).unwrap();
387 let inner = result.as_ok_inner().expect("should be Ok");
388 let (keys, values, _) = inner.as_hashmap().expect("should be hashmap");
389
390 for (k, v) in keys.iter().zip(values.iter()) {
392 if k.as_str() == Some("children") {
393 let arr = v.as_any_array().expect("should be array").to_generic();
394 assert_eq!(arr.len(), 1); }
396 }
397 }
398
399 #[test]
400 fn test_xml_parse_self_closing() {
401 let module = create_xml_module();
402 let parse_fn = module.get_export("parse").unwrap();
403 let ctx = test_ctx();
404 let input = ValueWord::from_string(Arc::new(r#"<br class="spacer"/>"#.to_string()));
405 let result = parse_fn(&[input], &ctx).unwrap();
406 let inner = result.as_ok_inner().expect("should be Ok");
407 let (keys, values, _) = inner.as_hashmap().expect("should be hashmap");
408
409 let mut found_name = false;
410 for (k, v) in keys.iter().zip(values.iter()) {
411 if k.as_str() == Some("name") {
412 assert_eq!(v.as_str(), Some("br"));
413 found_name = true;
414 }
415 }
416 assert!(found_name);
417 }
418
419 #[test]
420 fn test_xml_parse_no_root() {
421 let module = create_xml_module();
422 let parse_fn = module.get_export("parse").unwrap();
423 let ctx = test_ctx();
424 let input = ValueWord::from_string(Arc::new("".to_string()));
425 let result = parse_fn(&[input], &ctx);
426 assert!(result.is_err());
427 }
428
429 #[test]
430 fn test_xml_parse_requires_string() {
431 let module = create_xml_module();
432 let parse_fn = module.get_export("parse").unwrap();
433 let ctx = test_ctx();
434 let result = parse_fn(&[ValueWord::from_f64(42.0)], &ctx);
435 assert!(result.is_err());
436 }
437
438 #[test]
439 fn test_xml_stringify_simple() {
440 let module = create_xml_module();
441 let stringify_fn = module.get_export("stringify").unwrap();
442 let ctx = test_ctx();
443
444 let node_keys = vec![
446 ValueWord::from_string(Arc::new("name".to_string())),
447 ValueWord::from_string(Arc::new("attributes".to_string())),
448 ValueWord::from_string(Arc::new("children".to_string())),
449 ValueWord::from_string(Arc::new("text".to_string())),
450 ];
451 let node_values = vec![
452 ValueWord::from_string(Arc::new("root".to_string())),
453 ValueWord::from_hashmap_pairs(vec![], vec![]),
454 ValueWord::from_array(Arc::new(vec![])),
455 ValueWord::from_string(Arc::new("hello".to_string())),
456 ];
457 let node = ValueWord::from_hashmap_pairs(node_keys, node_values);
458
459 let result = stringify_fn(&[node], &ctx).unwrap();
460 let inner = result.as_ok_inner().expect("should be Ok");
461 let s = inner.as_str().expect("should be string");
462 assert!(s.contains("<root>"));
463 assert!(s.contains("hello"));
464 assert!(s.contains("</root>"));
465 }
466
467 #[test]
468 fn test_xml_stringify_with_attributes() {
469 let module = create_xml_module();
470 let stringify_fn = module.get_export("stringify").unwrap();
471 let ctx = test_ctx();
472
473 let attr_keys = vec![ValueWord::from_string(Arc::new("id".to_string()))];
474 let attr_values = vec![ValueWord::from_string(Arc::new("42".to_string()))];
475 let attrs = ValueWord::from_hashmap_pairs(attr_keys, attr_values);
476
477 let node_keys = vec![
478 ValueWord::from_string(Arc::new("name".to_string())),
479 ValueWord::from_string(Arc::new("attributes".to_string())),
480 ValueWord::from_string(Arc::new("children".to_string())),
481 ];
482 let node_values = vec![
483 ValueWord::from_string(Arc::new("item".to_string())),
484 attrs,
485 ValueWord::from_array(Arc::new(vec![])),
486 ];
487 let node = ValueWord::from_hashmap_pairs(node_keys, node_values);
488
489 let result = stringify_fn(&[node], &ctx).unwrap();
490 let inner = result.as_ok_inner().expect("should be Ok");
491 let s = inner.as_str().expect("should be string");
492 assert!(s.contains("id=\"42\""));
493 }
494
495 #[test]
496 fn test_xml_stringify_self_closing() {
497 let module = create_xml_module();
498 let stringify_fn = module.get_export("stringify").unwrap();
499 let ctx = test_ctx();
500
501 let node_keys = vec![
502 ValueWord::from_string(Arc::new("name".to_string())),
503 ValueWord::from_string(Arc::new("attributes".to_string())),
504 ValueWord::from_string(Arc::new("children".to_string())),
505 ];
506 let node_values = vec![
507 ValueWord::from_string(Arc::new("br".to_string())),
508 ValueWord::from_hashmap_pairs(vec![], vec![]),
509 ValueWord::from_array(Arc::new(vec![])),
510 ];
511 let node = ValueWord::from_hashmap_pairs(node_keys, node_values);
512
513 let result = stringify_fn(&[node], &ctx).unwrap();
514 let inner = result.as_ok_inner().expect("should be Ok");
515 let s = inner.as_str().expect("should be string");
516 assert!(s.contains("<br/>") || s.contains("<br />"));
517 }
518
519 #[test]
520 fn test_xml_roundtrip() {
521 let module = create_xml_module();
522 let parse_fn = module.get_export("parse").unwrap();
523 let stringify_fn = module.get_export("stringify").unwrap();
524 let ctx = test_ctx();
525
526 let xml_str = r#"<root><child attr="val">text</child></root>"#;
527 let parsed = parse_fn(
528 &[ValueWord::from_string(Arc::new(xml_str.to_string()))],
529 &ctx,
530 )
531 .unwrap();
532 let inner = parsed.as_ok_inner().expect("should be Ok");
533 let re_stringified = stringify_fn(&[inner.clone()], &ctx).unwrap();
534 let re_str = re_stringified.as_ok_inner().expect("should be Ok");
535 let s = re_str.as_str().expect("should be string");
536 assert!(s.contains("root"));
537 assert!(s.contains("child"));
538 assert!(s.contains("text"));
539 }
540
541 #[test]
542 fn test_xml_schemas() {
543 let module = create_xml_module();
544
545 let parse_schema = module.get_schema("parse").unwrap();
546 assert_eq!(parse_schema.params.len(), 1);
547 assert_eq!(parse_schema.params[0].name, "text");
548 assert!(parse_schema.params[0].required);
549 assert_eq!(parse_schema.return_type.as_deref(), Some("Result<HashMap>"));
550
551 let stringify_schema = module.get_schema("stringify").unwrap();
552 assert_eq!(stringify_schema.params.len(), 1);
553 assert!(stringify_schema.params[0].required);
554 }
555
556 #[test]
557 fn test_xml_parse_with_declaration() {
558 let module = create_xml_module();
559 let parse_fn = module.get_export("parse").unwrap();
560 let ctx = test_ctx();
561 let input = ValueWord::from_string(Arc::new(
562 r#"<?xml version="1.0" encoding="UTF-8"?><root>hello</root>"#.to_string(),
563 ));
564 let result = parse_fn(&[input], &ctx).unwrap();
565 let inner = result.as_ok_inner().expect("should be Ok");
566 let (keys, values, _) = inner.as_hashmap().expect("should be hashmap");
567 let mut found_name = false;
568 for (k, v) in keys.iter().zip(values.iter()) {
569 if k.as_str() == Some("name") {
570 assert_eq!(v.as_str(), Some("root"));
571 found_name = true;
572 }
573 }
574 assert!(found_name);
575 }
576}