1use super::config::{EntityPolicy, FromXmlConfig};
21use super::conversion::{
22 items_are_list_elements, items_are_tensor_elements, items_to_list,
23 items_to_matrix_list_with_type, items_to_tensor, to_hedl_key,
24};
25use super::values::{parse_value_with_config, parse_version};
26use hedl_core::convert::parse_reference;
27use hedl_core::lex::singularize_and_capitalize;
28use hedl_core::{Document, Item, Value};
29use quick_xml::events::Event;
30use quick_xml::Reader;
31use std::collections::BTreeMap;
32
33const MAX_RECURSION_DEPTH: usize = 100;
35
36pub fn from_xml(xml: &str, config: &FromXmlConfig) -> Result<Document, String> {
38 if config.entity_policy == EntityPolicy::RejectDtd
40 && (xml.contains("<!DOCTYPE") || xml.contains("<!ENTITY"))
41 {
42 return Err("DOCTYPE declarations rejected by entity policy (XXE prevention)".to_string());
43 }
44
45 let mut reader = Reader::from_str(xml);
46 reader.config_mut().trim_text(false);
49
50 let mut doc = Document::new(config.version);
51
52 loop {
54 match reader.read_event() {
55 Ok(Event::DocType(e)) => {
56 if config.log_security_events {
57 eprintln!(
58 "[SECURITY] DTD detected in XML input at position {}: {:?}",
59 reader.buffer_position(),
60 String::from_utf8_lossy(&e)
61 );
62 }
63
64 match config.entity_policy {
65 EntityPolicy::RejectDtd => {
66 return Err(format!(
67 "DOCTYPE declaration rejected at position {} (XXE prevention policy)",
68 reader.buffer_position()
69 ));
70 }
71 EntityPolicy::WarnOnEntities => {
72 eprintln!(
73 "[WARNING] DOCTYPE detected in XML. External entities are NOT processed by quick-xml."
74 );
75 }
76 EntityPolicy::AllowDtdNoExternal => {
77 }
79 }
80 }
81 Ok(Event::Start(e)) | Ok(Event::Empty(e)) => {
82 let name = String::from_utf8_lossy(e.name().as_ref()).to_string();
83
84 for attr in e.attributes().flatten() {
86 let key = String::from_utf8_lossy(attr.key.as_ref()).to_string();
87 let value = String::from_utf8_lossy(&attr.value).to_string();
88 if key == "version" {
89 if let Some((major, minor)) = parse_version(&value) {
90 doc.version = (major, minor);
91 }
92 }
93 }
94
95 doc.root = parse_children(&mut reader, &name, config, &mut doc.structs, 0)?;
97 break;
98 }
99 Ok(Event::Eof) => break,
100 Err(e) => {
101 return Err(format!(
102 "XML parse error at position {}: {}",
103 reader.buffer_position(),
104 e
105 ))
106 }
107 _ => {}
108 }
109 }
110
111 Ok(doc)
112}
113
114pub(crate) fn parse_children(
115 reader: &mut Reader<&[u8]>,
116 parent_name: &str,
117 config: &FromXmlConfig,
118 structs: &mut BTreeMap<String, Vec<String>>,
119 depth: usize,
120) -> Result<BTreeMap<String, Item>, String> {
121 if depth > MAX_RECURSION_DEPTH {
123 return Err(format!(
124 "XML recursion depth exceeded (max: {})",
125 MAX_RECURSION_DEPTH
126 ));
127 }
128 let mut children = BTreeMap::new();
129 let mut element_counts: BTreeMap<String, (Vec<Item>, Option<String>)> = BTreeMap::new();
131
132 loop {
133 match reader.read_event() {
134 Ok(Event::Start(e)) => {
135 let raw_name = String::from_utf8_lossy(e.name().as_ref()).to_string();
136 let name = to_hedl_key(&raw_name);
137
138 let explicit_type = e.attributes().flatten().find_map(|attr| {
141 let key = String::from_utf8_lossy(attr.key.as_ref());
142 if key == "type" {
143 let value = String::from_utf8_lossy(&attr.value).to_string();
144 if value
147 .chars()
148 .next()
149 .map(|c| c.is_ascii_uppercase())
150 .unwrap_or(false)
151 {
152 Some(value)
153 } else {
154 None
155 }
156 } else {
157 None
158 }
159 });
160
161 let elem_owned = e.to_owned();
162 let item = parse_element(reader, &elem_owned, config, structs, depth + 1)?;
163
164 if config.infer_lists {
166 let entry = element_counts
167 .entry(name.clone())
168 .or_insert((Vec::new(), None));
169 entry.0.push(item);
170 if entry.1.is_none() && explicit_type.is_some() {
172 entry.1 = explicit_type;
173 }
174 } else {
175 if children.contains_key(&name) {
177 return Err(format!(
178 "Duplicate element '{}' found with infer_lists=false. \
179 Enable infer_lists to automatically collect duplicates into a list.",
180 name
181 ));
182 }
183 children.insert(name, item);
184 }
185 }
186 Ok(Event::Empty(e)) => {
187 let raw_name = String::from_utf8_lossy(e.name().as_ref()).to_string();
188 let name = to_hedl_key(&raw_name);
189
190 let explicit_type = e.attributes().flatten().find_map(|attr| {
193 let key = String::from_utf8_lossy(attr.key.as_ref());
194 if key == "type" {
195 let value = String::from_utf8_lossy(&attr.value).to_string();
196 if value
199 .chars()
200 .next()
201 .map(|c| c.is_ascii_uppercase())
202 .unwrap_or(false)
203 {
204 Some(value)
205 } else {
206 None
207 }
208 } else {
209 None
210 }
211 });
212
213 let elem_owned = e.to_owned();
214 let item = parse_empty_element(&elem_owned, config)?;
215
216 if config.infer_lists {
217 let entry = element_counts
218 .entry(name.clone())
219 .or_insert((Vec::new(), None));
220 entry.0.push(item);
221 if entry.1.is_none() && explicit_type.is_some() {
222 entry.1 = explicit_type;
223 }
224 } else {
225 if children.contains_key(&name) {
227 return Err(format!(
228 "Duplicate element '{}' found with infer_lists=false. \
229 Enable infer_lists to automatically collect duplicates into a list.",
230 name
231 ));
232 }
233 children.insert(name, item);
234 }
235 }
236 Ok(Event::End(e)) => {
237 let name = String::from_utf8_lossy(e.name().as_ref()).to_string();
238 if name == parent_name {
239 break;
240 }
241 }
242 Ok(Event::Eof) => break,
243 Err(e) => return Err(format!("XML parse error: {}", e)),
244 _ => {}
245 }
246 }
247
248 if config.infer_lists {
250 for (name, (items, explicit_type)) in element_counts {
251 if items.len() > 1 {
252 let list =
254 items_to_matrix_list_with_type(&name, items, explicit_type, config, structs)?;
255 children.insert(name, Item::List(list));
256 } else if let Some(item) = items.into_iter().next() {
257 if let Some(ref type_name) = explicit_type {
259 if let Item::Object(inner) = &item {
263 let expected_child = type_name.to_lowercase();
265 if let Some((_, child_item)) = inner
266 .iter()
267 .find(|(k, _)| k.to_lowercase() == expected_child)
268 {
269 match child_item {
270 Item::List(list) => {
271 let mut new_list = list.clone();
273 new_list.type_name = type_name.clone();
274 structs.insert(type_name.clone(), new_list.schema.clone());
275 children.insert(name, Item::List(new_list));
276 continue;
277 }
278 Item::Object(obj) => {
279 let list = items_to_matrix_list_with_type(
281 &expected_child,
282 vec![Item::Object(obj.clone())],
283 Some(type_name.clone()),
284 config,
285 structs,
286 )?;
287 children.insert(name, Item::List(list));
288 continue;
289 }
290 _ => {}
291 }
292 }
293 }
294 let list = items_to_matrix_list_with_type(
296 &name,
297 vec![item],
298 explicit_type,
299 config,
300 structs,
301 )?;
302 children.insert(name, Item::List(list));
303 } else {
304 children.insert(name, item);
305 }
306 }
307 }
308 }
309
310 Ok(children)
311}
312
313pub(crate) fn parse_element(
314 reader: &mut Reader<&[u8]>,
315 elem: &quick_xml::events::BytesStart<'_>,
316 config: &FromXmlConfig,
317 structs: &mut BTreeMap<String, Vec<String>>,
318 depth: usize,
319) -> Result<Item, String> {
320 if depth > MAX_RECURSION_DEPTH {
322 return Err(format!(
323 "XML recursion depth exceeded (max: {})",
324 MAX_RECURSION_DEPTH
325 ));
326 }
327 let name = String::from_utf8_lossy(elem.name().as_ref()).to_string();
328
329 let mut attributes = BTreeMap::new();
331 let mut is_reference = false;
332 for attr in elem.attributes().flatten() {
333 let raw_key = String::from_utf8_lossy(attr.key.as_ref()).to_string();
334 let value = String::from_utf8_lossy(&attr.value).to_string();
335
336 if raw_key == "__hedl_type__" {
338 if value == "ref" {
339 is_reference = true;
340 }
341 continue; }
343 if raw_key == "__hedl_child__" {
344 continue; }
346 let key = to_hedl_key(&raw_key);
350 attributes.insert(key, value);
351 }
352
353 let mut text_content = String::new();
355 let mut child_elements: BTreeMap<String, Vec<Item>> = BTreeMap::new();
356 let mut marked_children: BTreeMap<String, Vec<Item>> = BTreeMap::new(); let mut has_children = false;
358
359 loop {
360 match reader.read_event() {
361 Ok(Event::Start(e)) => {
362 has_children = true;
363 let raw_child_name = String::from_utf8_lossy(e.name().as_ref()).to_string();
364 let child_name = to_hedl_key(&raw_child_name);
365
366 let is_marked_child = e.attributes().any(|attr| {
368 if let Ok(attr) = attr {
369 let key = String::from_utf8_lossy(attr.key.as_ref());
370 let val = String::from_utf8_lossy(&attr.value);
371 key == "__hedl_child__" && val == "true"
372 } else {
373 false
374 }
375 });
376
377 let elem_owned = e.to_owned();
378 let child_item = parse_element(reader, &elem_owned, config, structs, depth + 1)?;
379
380 if is_marked_child {
381 marked_children
382 .entry(raw_child_name)
383 .or_default()
384 .push(child_item);
385 } else {
386 child_elements
387 .entry(child_name)
388 .or_default()
389 .push(child_item);
390 }
391 }
392 Ok(Event::Empty(e)) => {
393 has_children = true;
394 let raw_child_name = String::from_utf8_lossy(e.name().as_ref()).to_string();
395 let child_name = to_hedl_key(&raw_child_name);
396
397 let is_marked_child = e.attributes().any(|attr| {
399 if let Ok(attr) = attr {
400 let key = String::from_utf8_lossy(attr.key.as_ref());
401 let val = String::from_utf8_lossy(&attr.value);
402 key == "__hedl_child__" && val == "true"
403 } else {
404 false
405 }
406 });
407
408 let elem_owned = e.to_owned();
409 let child_item = parse_empty_element(&elem_owned, config)?;
410
411 if is_marked_child {
412 marked_children
413 .entry(raw_child_name)
414 .or_default()
415 .push(child_item);
416 } else {
417 child_elements
418 .entry(child_name)
419 .or_default()
420 .push(child_item);
421 }
422 }
423 Ok(Event::Text(e)) => {
424 let content = e
425 .xml_content()
426 .map_err(|e| format!("Text decode error: {}", e))?;
427 text_content.push_str(&content);
428 }
429 Ok(Event::GeneralRef(e)) => {
430 let ref_name = e.decode().map_err(|e| format!("Ref decode error: {}", e))?;
432 let unescaped = match ref_name.as_ref() {
433 "amp" => "&",
434 "lt" => "<",
435 "gt" => ">",
436 "quot" => "\"",
437 "apos" => "'",
438 _ => return Err(format!("Unknown entity reference: {}", ref_name)),
439 };
440 text_content.push_str(unescaped);
441 }
442 Ok(Event::End(e)) => {
443 let end_name = String::from_utf8_lossy(e.name().as_ref()).to_string();
444 if end_name == name {
445 break;
446 }
447 }
448 Ok(Event::Eof) => break,
449 Err(e) => return Err(format!("XML parse error: {}", e)),
450 _ => {}
451 }
452 }
453
454 if has_children {
456 let mut result_children = BTreeMap::new();
458 for (child_name, items) in child_elements {
459 if items.len() > 1 {
460 if config.infer_lists {
461 if child_name == "item" && items_are_tensor_elements(&items) {
470 let tensor = items_to_tensor(&items)?;
472 result_children
473 .insert(child_name, Item::Scalar(Value::Tensor(Box::new(tensor))));
474 } else if child_name == "item" && items_are_list_elements(&items) {
475 let list = items_to_list(&items)?;
477 result_children
478 .insert(child_name, Item::Scalar(Value::List(Box::new(list))));
479 } else {
480 use super::conversion::items_to_matrix_list;
482 let list = items_to_matrix_list(&child_name, items, config, structs)?;
483 result_children.insert(child_name, Item::List(list));
484 }
485 } else {
486 return Err(format!(
488 "Duplicate element '{}' found with infer_lists=false. \
489 Enable infer_lists to automatically collect duplicates into a list.",
490 child_name
491 ));
492 }
493 } else if let Some(item) = items.into_iter().next() {
494 result_children.insert(child_name, item);
495 }
496 }
497
498 for (child_type_raw, child_items) in marked_children {
501 if !child_items.is_empty() {
502 use super::conversion::items_to_matrix_list;
504 let list = items_to_matrix_list(&child_type_raw, child_items, config, structs)?;
505 let child_key = to_hedl_key(&child_type_raw);
506 result_children.insert(child_key, Item::List(list));
507 }
508 }
509
510 for (key, value_str) in attributes {
512 let value = parse_value_with_config(&value_str, config)?;
513 result_children.insert(key, Item::Scalar(value));
514 }
515
516 if !text_content.trim().is_empty() {
518 let value = if is_reference {
519 Value::Reference(parse_reference(text_content.trim())?)
520 } else {
521 parse_value_with_config(&text_content, config)?
522 };
523 result_children.insert("_text".to_string(), Item::Scalar(value));
524 }
525
526 if result_children.len() == 1 {
534 let (child_key, child_item) = result_children.iter().next().expect("len == 1");
536
537 if let Item::List(list) = child_item {
539 let has_nested_children = list
541 .rows
542 .iter()
543 .any(|node| node.children().map(|c| !c.is_empty()).unwrap_or(false));
544 if !has_nested_children {
545 let parent_singular =
549 singularize_and_capitalize(&to_hedl_key(&name)).to_lowercase();
550 let child_type = singularize_and_capitalize(child_key).to_lowercase();
551 if parent_singular == child_type {
552 return Ok(result_children.into_values().next().expect("len == 1"));
555 }
556 }
557 }
558
559 if let Item::Scalar(Value::List(_)) = child_item {
562 if child_key == "item" {
565 return Ok(result_children.into_values().next().expect("len == 1"));
567 }
568 }
569 }
570
571 Ok(Item::Object(result_children))
573 } else if !text_content.trim().is_empty() {
574 let value = if is_reference {
576 Value::Reference(parse_reference(text_content.trim())?)
578 } else {
579 parse_value_with_config(&text_content, config)?
580 };
581
582 if !attributes.is_empty() {
584 let mut obj = BTreeMap::new();
585 obj.insert("_text".to_string(), Item::Scalar(value));
586 for (key, value_str) in attributes {
587 let attr_value = parse_value_with_config(&value_str, config)?;
588 obj.insert(key, Item::Scalar(attr_value));
589 }
590 Ok(Item::Object(obj))
591 } else {
592 Ok(Item::Scalar(value))
593 }
594 } else if !attributes.is_empty() {
595 let mut obj = BTreeMap::new();
597 for (key, value_str) in attributes {
598 let value = parse_value_with_config(&value_str, config)?;
599 obj.insert(key, Item::Scalar(value));
600 }
601 Ok(Item::Object(obj))
602 } else {
603 Ok(Item::Scalar(Value::Null))
605 }
606}
607
608pub(crate) fn parse_empty_element(
609 elem: &quick_xml::events::BytesStart<'_>,
610 config: &FromXmlConfig,
611) -> Result<Item, String> {
612 let mut attributes = BTreeMap::new();
613
614 for attr in elem.attributes().flatten() {
615 let raw_key = String::from_utf8_lossy(attr.key.as_ref()).to_string();
616 let key = to_hedl_key(&raw_key);
617 let value = String::from_utf8_lossy(&attr.value).to_string();
618 attributes.insert(key, value);
619 }
620
621 if attributes.is_empty() {
622 Ok(Item::Scalar(Value::Null))
623 } else if attributes.len() == 1 && attributes.contains_key("value") {
624 let value_str = attributes.get("value").expect("key exists");
627 let value = parse_value_with_config(value_str, config)?;
628 Ok(Item::Scalar(value))
629 } else {
630 let mut obj = BTreeMap::new();
632 for (key, value_str) in attributes {
633 let value = parse_value_with_config(&value_str, config)?;
634 obj.insert(key, Item::Scalar(value));
635 }
636 Ok(Item::Object(obj))
637 }
638}