hedl_xml/
from_xml.rs

1// Dweve HEDL - Hierarchical Entity Data Language
2//
3// Copyright (c) 2025 Dweve IP B.V. and individual contributors.
4//
5// SPDX-License-Identifier: Apache-2.0
6//
7// Licensed under the Apache License, Version 2.0 (the "License");
8// you may not use this file except in compliance with the License.
9// You may obtain a copy of the License in the LICENSE file at the
10// root of this repository or at: http://www.apache.org/licenses/LICENSE-2.0
11//
12// Unless required by applicable law or agreed to in writing, software
13// distributed under the License is distributed on an "AS IS" BASIS,
14// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15// See the License for the specific language governing permissions and
16// limitations under the License.
17
18//! XML to HEDL conversion
19
20use hedl_core::convert::parse_reference;
21use hedl_core::lex::Tensor;
22use hedl_core::lex::{parse_expression_token, singularize_and_capitalize};
23use hedl_core::{Document, Item, MatrixList, Node, Value};
24use quick_xml::events::Event;
25use quick_xml::Reader;
26use std::collections::BTreeMap;
27
28/// Maximum recursion depth for XML parsing (prevents stack overflow).
29const MAX_RECURSION_DEPTH: usize = 100;
30
31/// Policy for handling XML entities and DTDs
32#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
33pub enum EntityPolicy {
34    /// Reject XML with DOCTYPE declarations (strictest, recommended)
35    RejectDtd,
36    /// Allow DOCTYPE but never resolve external entities (default)
37    #[default]
38    AllowDtdNoExternal,
39    /// Log warnings when DTDs or entity references detected
40    WarnOnEntities,
41}
42
43/// Configuration for XML import
44#[derive(Debug, Clone)]
45pub struct FromXmlConfig {
46    /// Default type name for list items without metadata
47    pub default_type_name: String,
48    /// HEDL version to use
49    pub version: (u32, u32),
50    /// Try to infer list structures from repeated elements
51    pub infer_lists: bool,
52
53    /// Entity handling policy (XXE prevention)
54    pub entity_policy: EntityPolicy,
55
56    /// Enable security event logging
57    pub log_security_events: bool,
58}
59
60impl Default for FromXmlConfig {
61    fn default() -> Self {
62        Self {
63            default_type_name: "Item".to_string(),
64            version: (1, 0),
65            infer_lists: true,
66            entity_policy: EntityPolicy::default(),
67            log_security_events: false,
68        }
69    }
70}
71
72impl FromXmlConfig {
73    /// Create a config with strict security (reject DTDs entirely)
74    pub fn strict_security() -> Self {
75        Self {
76            entity_policy: EntityPolicy::RejectDtd,
77            log_security_events: true,
78            ..Default::default()
79        }
80    }
81}
82
83impl hedl_core::convert::ImportConfig for FromXmlConfig {
84    fn default_type_name(&self) -> &str {
85        &self.default_type_name
86    }
87
88    fn version(&self) -> (u32, u32) {
89        self.version
90    }
91}
92
93/// Convert XML string to HEDL Document
94pub fn from_xml(xml: &str, config: &FromXmlConfig) -> Result<Document, String> {
95    // Pre-scan for DOCTYPE declarations if strict policy
96    if config.entity_policy == EntityPolicy::RejectDtd
97        && (xml.contains("<!DOCTYPE") || xml.contains("<!ENTITY"))
98    {
99        return Err("DOCTYPE declarations rejected by entity policy (XXE prevention)".to_string());
100    }
101
102    let mut reader = Reader::from_str(xml);
103    // Note: trim_text disabled to preserve whitespace around entity references
104    // In quick-xml 0.38+, entities like &amp; are separate Event::GeneralRef events
105    reader.config_mut().trim_text(false);
106
107    let mut doc = Document::new(config.version);
108
109    // Skip XML declaration and find root element
110    loop {
111        match reader.read_event() {
112            Ok(Event::DocType(e)) => {
113                if config.log_security_events {
114                    eprintln!(
115                        "[SECURITY] DTD detected in XML input at position {}: {:?}",
116                        reader.buffer_position(),
117                        String::from_utf8_lossy(&e)
118                    );
119                }
120
121                match config.entity_policy {
122                    EntityPolicy::RejectDtd => {
123                        return Err(format!(
124                            "DOCTYPE declaration rejected at position {} (XXE prevention policy)",
125                            reader.buffer_position()
126                        ));
127                    }
128                    EntityPolicy::WarnOnEntities => {
129                        eprintln!(
130                            "[WARNING] DOCTYPE detected in XML. External entities are NOT processed by quick-xml."
131                        );
132                    }
133                    EntityPolicy::AllowDtdNoExternal => {
134                        // Continue parsing, entities won't be resolved anyway
135                    }
136                }
137            }
138            Ok(Event::Start(e)) | Ok(Event::Empty(e)) => {
139                let name = String::from_utf8_lossy(e.name().as_ref()).to_string();
140
141                // Parse version from root if present
142                for attr in e.attributes().flatten() {
143                    let key = String::from_utf8_lossy(attr.key.as_ref()).to_string();
144                    let value = String::from_utf8_lossy(&attr.value).to_string();
145                    if key == "version" {
146                        if let Some((major, minor)) = parse_version(&value) {
147                            doc.version = (major, minor);
148                        }
149                    }
150                }
151
152                // Parse root content
153                doc.root = parse_children(&mut reader, &name, config, &mut doc.structs, 0)?;
154                break;
155            }
156            Ok(Event::Eof) => break,
157            Err(e) => {
158                return Err(format!(
159                    "XML parse error at position {}: {}",
160                    reader.buffer_position(),
161                    e
162                ))
163            }
164            _ => {}
165        }
166    }
167
168    Ok(doc)
169}
170
171fn parse_children(
172    reader: &mut Reader<&[u8]>,
173    parent_name: &str,
174    config: &FromXmlConfig,
175    structs: &mut BTreeMap<String, Vec<String>>,
176    depth: usize,
177) -> Result<BTreeMap<String, Item>, String> {
178    // Security: Prevent stack overflow via deep recursion
179    if depth > MAX_RECURSION_DEPTH {
180        return Err(format!(
181            "XML recursion depth exceeded (max: {})",
182            MAX_RECURSION_DEPTH
183        ));
184    }
185    let mut children = BTreeMap::new();
186    let mut element_counts: BTreeMap<String, Vec<Item>> = BTreeMap::new();
187
188    loop {
189        match reader.read_event() {
190            Ok(Event::Start(e)) => {
191                let raw_name = String::from_utf8_lossy(e.name().as_ref()).to_string();
192                let name = to_hedl_key(&raw_name);
193                let elem_owned = e.to_owned();
194                let item = parse_element(reader, &elem_owned, config, depth + 1)?;
195
196                // Track repeated elements for list inference
197                if config.infer_lists {
198                    element_counts.entry(name.clone()).or_default().push(item);
199                } else {
200                    // ISSUE 2 FIX: Detect duplicate elements when infer_lists is false
201                    if children.contains_key(&name) {
202                        return Err(format!(
203                            "Duplicate element '{}' found with infer_lists=false. \
204                             Enable infer_lists to automatically collect duplicates into a list.",
205                            name
206                        ));
207                    }
208                    children.insert(name, item);
209                }
210            }
211            Ok(Event::Empty(e)) => {
212                let raw_name = String::from_utf8_lossy(e.name().as_ref()).to_string();
213                let name = to_hedl_key(&raw_name);
214                let elem_owned = e.to_owned();
215                let item = parse_empty_element(&elem_owned, config)?;
216
217                if config.infer_lists {
218                    element_counts.entry(name.clone()).or_default().push(item);
219                } else {
220                    // ISSUE 2 FIX: Detect duplicate elements when infer_lists is false
221                    if children.contains_key(&name) {
222                        return Err(format!(
223                            "Duplicate element '{}' found with infer_lists=false. \
224                             Enable infer_lists to automatically collect duplicates into a list.",
225                            name
226                        ));
227                    }
228                    children.insert(name, item);
229                }
230            }
231            Ok(Event::End(e)) => {
232                let name = String::from_utf8_lossy(e.name().as_ref()).to_string();
233                if name == parent_name {
234                    break;
235                }
236            }
237            Ok(Event::Eof) => break,
238            Err(e) => return Err(format!("XML parse error: {}", e)),
239            _ => {}
240        }
241    }
242
243    // Process element counts to infer lists
244    if config.infer_lists {
245        for (name, items) in element_counts {
246            if items.len() > 1 {
247                // Multiple elements with same name - convert to list
248                let list = items_to_matrix_list(&name, items, config, structs)?;
249                children.insert(name, Item::List(list));
250            } else if let Some(item) = items.into_iter().next() {
251                children.insert(name, item);
252            }
253        }
254    }
255
256    Ok(children)
257}
258
259fn parse_element(
260    reader: &mut Reader<&[u8]>,
261    elem: &quick_xml::events::BytesStart<'_>,
262    config: &FromXmlConfig,
263    depth: usize,
264) -> Result<Item, String> {
265    // Security: Prevent stack overflow via deep recursion
266    if depth > MAX_RECURSION_DEPTH {
267        return Err(format!(
268            "XML recursion depth exceeded (max: {})",
269            MAX_RECURSION_DEPTH
270        ));
271    }
272    let name = String::from_utf8_lossy(elem.name().as_ref()).to_string();
273
274    // Extract attributes (convert keys to valid HEDL format)
275    let mut attributes = BTreeMap::new();
276    let mut is_reference = false;
277    for attr in elem.attributes().flatten() {
278        let raw_key = String::from_utf8_lossy(attr.key.as_ref()).to_string();
279        let value = String::from_utf8_lossy(&attr.value).to_string();
280
281        // Check for HEDL type marker (used to distinguish references from strings)
282        if raw_key == "__hedl_type__" {
283            if value == "ref" {
284                is_reference = true;
285            }
286            continue; // Don't include in regular attributes
287        }
288
289        let key = to_hedl_key(&raw_key);
290        attributes.insert(key, value);
291    }
292
293    // Parse content
294    let mut text_content = String::new();
295    let mut child_elements: BTreeMap<String, Vec<Item>> = BTreeMap::new();
296    let mut marked_children: BTreeMap<String, Vec<Item>> = BTreeMap::new(); // Elements with __hedl_child__
297    let mut has_children = false;
298
299    loop {
300        match reader.read_event() {
301            Ok(Event::Start(e)) => {
302                has_children = true;
303                let raw_child_name = String::from_utf8_lossy(e.name().as_ref()).to_string();
304                let child_name = to_hedl_key(&raw_child_name);
305
306                // Check for __hedl_child__ marker attribute
307                let is_marked_child = e.attributes().any(|attr| {
308                    if let Ok(attr) = attr {
309                        let key = String::from_utf8_lossy(attr.key.as_ref());
310                        let val = String::from_utf8_lossy(&attr.value);
311                        key == "__hedl_child__" && val == "true"
312                    } else {
313                        false
314                    }
315                });
316
317                let elem_owned = e.to_owned();
318                let child_item = parse_element(reader, &elem_owned, config, depth + 1)?;
319
320                if is_marked_child {
321                    marked_children
322                        .entry(raw_child_name)
323                        .or_default()
324                        .push(child_item);
325                } else {
326                    child_elements
327                        .entry(child_name)
328                        .or_default()
329                        .push(child_item);
330                }
331            }
332            Ok(Event::Empty(e)) => {
333                has_children = true;
334                let raw_child_name = String::from_utf8_lossy(e.name().as_ref()).to_string();
335                let child_name = to_hedl_key(&raw_child_name);
336
337                // Check for __hedl_child__ marker attribute
338                let is_marked_child = e.attributes().any(|attr| {
339                    if let Ok(attr) = attr {
340                        let key = String::from_utf8_lossy(attr.key.as_ref());
341                        let val = String::from_utf8_lossy(&attr.value);
342                        key == "__hedl_child__" && val == "true"
343                    } else {
344                        false
345                    }
346                });
347
348                let elem_owned = e.to_owned();
349                let child_item = parse_empty_element(&elem_owned, config)?;
350
351                if is_marked_child {
352                    marked_children
353                        .entry(raw_child_name)
354                        .or_default()
355                        .push(child_item);
356                } else {
357                    child_elements
358                        .entry(child_name)
359                        .or_default()
360                        .push(child_item);
361                }
362            }
363            Ok(Event::Text(e)) => {
364                let content = e
365                    .xml_content()
366                    .map_err(|e| format!("Text decode error: {}", e))?;
367                text_content.push_str(&content);
368            }
369            Ok(Event::GeneralRef(e)) => {
370                // Handle entity references (quick-xml 0.38+ reports these as separate events)
371                let ref_name = e.decode().map_err(|e| format!("Ref decode error: {}", e))?;
372                let unescaped = match ref_name.as_ref() {
373                    "amp" => "&",
374                    "lt" => "<",
375                    "gt" => ">",
376                    "quot" => "\"",
377                    "apos" => "'",
378                    _ => return Err(format!("Unknown entity reference: {}", ref_name)),
379                };
380                text_content.push_str(unescaped);
381            }
382            Ok(Event::End(e)) => {
383                let end_name = String::from_utf8_lossy(e.name().as_ref()).to_string();
384                if end_name == name {
385                    break;
386                }
387            }
388            Ok(Event::Eof) => break,
389            Err(e) => return Err(format!("XML parse error: {}", e)),
390            _ => {}
391        }
392    }
393
394    // Determine item type
395    if has_children {
396        // Convert collected child elements, inferring lists for repeated elements
397        let mut result_children = BTreeMap::new();
398        for (child_name, items) in child_elements {
399            if items.len() > 1 {
400                if config.infer_lists {
401                    // Check if all items are scalars/tensors (->tensor) or objects (->matrix list)
402                    if child_name == "item" && items_are_tensor_elements(&items) {
403                        // Convert to tensor
404                        let tensor = items_to_tensor(&items)?;
405                        result_children
406                            .insert(child_name, Item::Scalar(Value::Tensor(Box::new(tensor))));
407                    } else {
408                        // Multiple elements with same name - convert to list
409                        let list =
410                            items_to_matrix_list(&child_name, items, config, &mut BTreeMap::new())?;
411                        result_children.insert(child_name, Item::List(list));
412                    }
413                } else {
414                    // ISSUE 2 FIX: Error when duplicates found with infer_lists=false
415                    return Err(format!(
416                        "Duplicate element '{}' found with infer_lists=false. \
417                         Enable infer_lists to automatically collect duplicates into a list.",
418                        child_name
419                    ));
420                }
421            } else if let Some(item) = items.into_iter().next() {
422                result_children.insert(child_name, item);
423            }
424        }
425
426        // Convert marked children (elements with __hedl_child__="true") to lists
427        // These represent NEST hierarchical children that should be attached to nodes
428        for (child_type_raw, child_items) in marked_children {
429            if !child_items.is_empty() {
430                // Convert to matrix list (even a single child becomes a list)
431                let list = items_to_matrix_list(
432                    &child_type_raw,
433                    child_items,
434                    config,
435                    &mut BTreeMap::new(),
436                )?;
437                let child_key = to_hedl_key(&child_type_raw);
438                result_children.insert(child_key, Item::List(list));
439            }
440        }
441
442        // ISSUE 1 FIX: Merge attributes into the result object
443        for (key, value_str) in attributes {
444            let value = parse_value_with_config(&value_str, config)?;
445            result_children.insert(key, Item::Scalar(value));
446        }
447
448        // Handle mixed content (text + children/attributes)
449        if !text_content.trim().is_empty() {
450            let value = if is_reference {
451                Value::Reference(parse_reference(text_content.trim())?)
452            } else {
453                parse_value_with_config(&text_content, config)?
454            };
455            result_children.insert("_text".to_string(), Item::Scalar(value));
456        }
457
458        // Check if we should flatten: if object has single child that's a list,
459        // and the child name is the singular of the parent name, promote the list.
460        // This handles XML patterns like <users><user>...</user><user>...</user></users>
461        // which should become users: @User[...] not users: { user: @User[...] }
462        // BUT: don't flatten if the list has hierarchical children (NEST structures)
463        // ALSO: don't flatten if we have attributes or text content
464        if result_children.len() == 1 {
465            let (child_key, child_item) = result_children.iter().next().unwrap();
466            if let Item::List(list) = child_item {
467                // Don't flatten if any rows have children (hierarchical nesting)
468                let has_nested_children = list
469                    .rows
470                    .iter()
471                    .any(|node| node.children().map(|c| !c.is_empty()).unwrap_or(false));
472                if !has_nested_children {
473                    // Check if child is singular form of parent
474                    // Compare case-insensitively because XML element names may have different casing
475                    // e.g., post_tags -> PostTag, but child element might be posttag -> Posttag
476                    let parent_singular =
477                        singularize_and_capitalize(&to_hedl_key(&name)).to_lowercase();
478                    let child_type = singularize_and_capitalize(child_key).to_lowercase();
479                    if parent_singular == child_type {
480                        // Flatten: return the list directly
481                        return Ok(result_children.into_values().next().unwrap());
482                    }
483                }
484            }
485        }
486
487        // Object with nested elements
488        Ok(Item::Object(result_children))
489    } else if !text_content.trim().is_empty() {
490        // Scalar with text content (and possibly attributes)
491        let value = if is_reference {
492            // Explicitly marked as reference
493            Value::Reference(parse_reference(text_content.trim())?)
494        } else {
495            parse_value_with_config(&text_content, config)?
496        };
497
498        // ISSUE 1 FIX: If we have both text and attributes, create an object
499        if !attributes.is_empty() {
500            let mut obj = BTreeMap::new();
501            obj.insert("_text".to_string(), Item::Scalar(value));
502            for (key, value_str) in attributes {
503                let attr_value = parse_value_with_config(&value_str, config)?;
504                obj.insert(key, Item::Scalar(attr_value));
505            }
506            Ok(Item::Object(obj))
507        } else {
508            Ok(Item::Scalar(value))
509        }
510    } else if !attributes.is_empty() {
511        // Empty element with attributes - convert to object
512        let mut obj = BTreeMap::new();
513        for (key, value_str) in attributes {
514            let value = parse_value_with_config(&value_str, config)?;
515            obj.insert(key, Item::Scalar(value));
516        }
517        Ok(Item::Object(obj))
518    } else {
519        // Empty element - null value
520        Ok(Item::Scalar(Value::Null))
521    }
522}
523
524fn parse_empty_element(
525    elem: &quick_xml::events::BytesStart<'_>,
526    config: &FromXmlConfig,
527) -> Result<Item, String> {
528    let mut attributes = BTreeMap::new();
529
530    for attr in elem.attributes().flatten() {
531        let raw_key = String::from_utf8_lossy(attr.key.as_ref()).to_string();
532        let key = to_hedl_key(&raw_key);
533        let value = String::from_utf8_lossy(&attr.value).to_string();
534        attributes.insert(key, value);
535    }
536
537    if attributes.is_empty() {
538        Ok(Item::Scalar(Value::Null))
539    } else if attributes.len() == 1 && attributes.contains_key("value") {
540        // Special case: <elem value="x"/> -> scalar x
541        let value_str = attributes.get("value").unwrap();
542        let value = parse_value_with_config(value_str, config)?;
543        Ok(Item::Scalar(value))
544    } else {
545        // Multiple attributes - convert to object
546        let mut obj = BTreeMap::new();
547        for (key, value_str) in attributes {
548            let value = parse_value_with_config(&value_str, config)?;
549            obj.insert(key, Item::Scalar(value));
550        }
551        Ok(Item::Object(obj))
552    }
553}
554
555fn parse_value_with_config(s: &str, config: &FromXmlConfig) -> Result<Value, String> {
556    let trimmed = s.trim();
557
558    // Detect entity references (&entity;)
559    if trimmed.contains('&') && trimmed.contains(';') {
560        if config.log_security_events {
561            eprintln!("[SECURITY] Entity reference detected in value: {}", trimmed);
562        }
563
564        // Check for potentially malicious entity patterns
565        if (trimmed.contains("&xxe;")
566            || trimmed.contains("&file;")
567            || trimmed.contains("&passwd;")
568            || trimmed.contains("&secret;"))
569            && config.entity_policy == EntityPolicy::WarnOnEntities
570        {
571            eprintln!(
572                "[WARNING] Suspicious entity reference detected: {}",
573                trimmed
574            );
575        }
576    }
577
578    if trimmed.is_empty() {
579        return Ok(Value::Null);
580    }
581
582    // Note: References are NOT auto-detected from @... pattern.
583    // They must be explicitly marked with __hedl_type__="ref" attribute.
584    // This prevents strings like "@not-a-ref" from being incorrectly parsed as references.
585
586    // Check for expression pattern $(...)
587    if trimmed.starts_with("$(") && trimmed.ends_with(')') {
588        let expr =
589            parse_expression_token(trimmed).map_err(|e| format!("Invalid expression: {}", e))?;
590        return Ok(Value::Expression(Box::new(expr)));
591    }
592
593    // Try parsing as boolean
594    if trimmed == "true" {
595        return Ok(Value::Bool(true));
596    }
597    if trimmed == "false" {
598        return Ok(Value::Bool(false));
599    }
600
601    // Try parsing as number
602    if let Ok(i) = trimmed.parse::<i64>() {
603        return Ok(Value::Int(i));
604    }
605    if let Ok(f) = trimmed.parse::<f64>() {
606        return Ok(Value::Float(f));
607    }
608
609    // Default to string
610    Ok(Value::String(trimmed.to_string().into()))
611}
612
613#[allow(dead_code)]
614fn parse_value(s: &str) -> Result<Value, String> {
615    // Legacy function for tests - uses default config
616    let config = FromXmlConfig::default();
617    parse_value_with_config(s, &config)
618}
619
620fn parse_version(s: &str) -> Option<(u32, u32)> {
621    let parts: Vec<&str> = s.split('.').collect();
622    if parts.len() >= 2 {
623        let major = parts[0].parse().ok()?;
624        let minor = parts[1].parse().ok()?;
625        Some((major, minor))
626    } else {
627        None
628    }
629}
630
631fn items_to_matrix_list(
632    name: &str,
633    items: Vec<Item>,
634    _config: &FromXmlConfig,
635    structs: &mut BTreeMap<String, Vec<String>>,
636) -> Result<MatrixList, String> {
637    // Infer type name from element name (singularize and capitalize)
638    let type_name = singularize_and_capitalize(name);
639
640    // Infer schema from first item
641    let schema = infer_schema(&items)?;
642
643    // Register the struct definition
644    structs.insert(type_name.clone(), schema.clone());
645
646    let mut rows = Vec::new();
647    for (idx, item) in items.into_iter().enumerate() {
648        let node = item_to_node(&type_name, &schema, item, idx)?;
649        rows.push(node);
650    }
651
652    Ok(MatrixList {
653        type_name,
654        schema,
655        rows,
656        count_hint: None,
657    })
658}
659
660fn infer_schema(items: &[Item]) -> Result<Vec<String>, String> {
661    if let Some(Item::Object(first_obj)) = items.first() {
662        // Only include scalar fields in the schema, not nested lists or child objects
663        let mut keys: Vec<_> = first_obj
664            .iter()
665            .filter(|(_, item)| matches!(item, Item::Scalar(_)))
666            .map(|(k, _)| k.clone())
667            .collect();
668        keys.sort();
669
670        // Ensure "id" is first if present
671        if let Some(pos) = keys.iter().position(|k| k == "id") {
672            keys.remove(pos);
673            keys.insert(0, "id".to_string());
674        } else {
675            // Add implicit id column
676            keys.insert(0, "id".to_string());
677        }
678
679        Ok(keys)
680    } else {
681        // Default schema
682        Ok(vec!["id".to_string(), "value".to_string()])
683    }
684}
685
686fn item_to_node(
687    type_name: &str,
688    schema: &[String],
689    item: Item,
690    idx: usize,
691) -> Result<Node, String> {
692    match item {
693        Item::Object(obj) => {
694            // Extract ID from object or generate one
695            let id = obj
696                .get(&schema[0])
697                .and_then(|i| i.as_scalar())
698                .and_then(|v| v.as_str())
699                .map(|s| s.to_string())
700                .unwrap_or_else(|| format!("{}", idx));
701
702            // Extract ALL field values (including ID) per SPEC
703            let mut fields = Vec::new();
704            for col in schema {
705                let value = obj
706                    .get(col)
707                    .and_then(|i| i.as_scalar())
708                    .cloned()
709                    .unwrap_or(Value::Null);
710                fields.push(value);
711            }
712
713            // Extract nested children (Item::List entries become child nodes)
714            let mut children: BTreeMap<String, Vec<Node>> = BTreeMap::new();
715            for child_item in obj.values() {
716                if let Item::List(child_list) = child_item {
717                    // Convert child list rows to nodes
718                    children.insert(child_list.type_name.clone(), child_list.rows.clone());
719                }
720            }
721
722            Ok(Node {
723                type_name: type_name.to_string(),
724                id,
725                fields: fields.into(),
726                children: if children.is_empty() {
727                    None
728                } else {
729                    Some(Box::new(children))
730                },
731                child_count: 0,
732            })
733        }
734        Item::Scalar(value) => {
735            // Single scalar - create node with ID value and scalar value
736            let id = format!("{}", idx);
737            Ok(Node {
738                type_name: type_name.to_string(),
739                id: id.clone(),
740                fields: vec![Value::String(id.into()), value].into(),
741                children: None,
742                child_count: 0,
743            })
744        }
745        Item::List(_) => Err("Cannot convert nested list to node".to_string()),
746    }
747}
748
749/// Convert any string to a valid HEDL key (lowercase snake_case).
750/// "Category" -> "category", "UserPost" -> "user_post", "XMLData" -> "xmldata"
751/// ISSUE 3 FIX: Also sanitizes namespaces and invalid characters
752/// "x:tag" -> "x_tag", "my-key" -> "my_key", "key.name" -> "key_name"
753fn to_hedl_key(s: &str) -> String {
754    let mut result = String::new();
755    let mut prev_was_upper = false;
756
757    for (i, c) in s.chars().enumerate() {
758        if c.is_ascii_uppercase() {
759            // Add underscore before uppercase letter (except at start or after another uppercase)
760            if i > 0 && !prev_was_upper && !result.ends_with('_') {
761                result.push('_');
762            }
763            result.push(c.to_ascii_lowercase());
764            prev_was_upper = true;
765        } else if c.is_ascii_lowercase() || c.is_ascii_digit() || c == '_' {
766            // Valid HEDL key characters
767            result.push(c);
768            prev_was_upper = false;
769        } else {
770            // Invalid characters (namespace colons, hyphens, dots, etc.) -> underscore
771            if !result.is_empty() && !result.ends_with('_') {
772                result.push('_');
773            }
774            prev_was_upper = false;
775        }
776    }
777
778    // Clean up any double underscores
779    while result.contains("__") {
780        result = result.replace("__", "_");
781    }
782
783    // Remove leading/trailing underscores
784    let result = result.trim_matches('_').to_string();
785
786    // Ensure result is not empty and starts with valid character
787    if result.is_empty() {
788        return "key".to_string();
789    }
790
791    // If first character is a digit, prepend underscore
792    if result.as_bytes()[0].is_ascii_digit() {
793        format!("_{}", result)
794    } else {
795        result
796    }
797}
798
799/// Check if all items are suitable for tensor representation.
800/// Items must be numeric scalars or objects containing only a tensor at the "item" key.
801fn items_are_tensor_elements(items: &[Item]) -> bool {
802    items.iter().all(|item| {
803        match item {
804            // Direct numeric scalars
805            Item::Scalar(Value::Int(_)) => true,
806            Item::Scalar(Value::Float(_)) => true,
807            // Already-parsed tensors
808            Item::Scalar(Value::Tensor(_)) => true,
809            // Objects with single "item" key containing a tensor (nested arrays)
810            Item::Object(obj) if obj.len() == 1 => {
811                matches!(obj.get("item"), Some(Item::Scalar(Value::Tensor(_))))
812            }
813            _ => false,
814        }
815    })
816}
817
818/// Convert items to a tensor.
819fn items_to_tensor(items: &[Item]) -> Result<Tensor, String> {
820    let mut tensor_items = Vec::new();
821
822    for item in items {
823        let tensor = match item {
824            Item::Scalar(Value::Int(n)) => Tensor::Scalar(*n as f64),
825            Item::Scalar(Value::Float(f)) => Tensor::Scalar(*f),
826            Item::Scalar(Value::Tensor(t)) => (**t).clone(),
827            Item::Object(obj) if obj.len() == 1 => {
828                // Nested tensor element (object with only "item" key containing tensor)
829                if let Some(Item::Scalar(Value::Tensor(t))) = obj.get("item") {
830                    (**t).clone()
831                } else {
832                    return Err("Cannot convert non-numeric item to tensor".to_string());
833                }
834            }
835            _ => return Err("Cannot convert non-numeric item to tensor".to_string()),
836        };
837        tensor_items.push(tensor);
838    }
839
840    Ok(Tensor::Array(tensor_items))
841}
842
843#[cfg(test)]
844mod tests {
845    use super::*;
846
847    // ==================== FromXmlConfig tests ====================
848
849    #[test]
850    fn test_from_xml_config_default() {
851        let config = FromXmlConfig::default();
852        assert_eq!(config.default_type_name, "Item");
853        assert_eq!(config.version, (1, 0));
854        assert!(config.infer_lists);
855    }
856
857    #[test]
858    fn test_from_xml_config_debug() {
859        let config = FromXmlConfig::default();
860        let debug = format!("{:?}", config);
861        assert!(debug.contains("FromXmlConfig"));
862        assert!(debug.contains("default_type_name"));
863        assert!(debug.contains("version"));
864        assert!(debug.contains("infer_lists"));
865    }
866
867    #[test]
868    fn test_from_xml_config_clone() {
869        let config = FromXmlConfig {
870            default_type_name: "Custom".to_string(),
871            version: (2, 1),
872            infer_lists: false,
873            entity_policy: EntityPolicy::RejectDtd,
874            log_security_events: true,
875        };
876        let cloned = config.clone();
877        assert_eq!(cloned.default_type_name, "Custom");
878        assert_eq!(cloned.version, (2, 1));
879        assert!(!cloned.infer_lists);
880    }
881
882    #[test]
883    fn test_from_xml_config_custom() {
884        let config = FromXmlConfig {
885            default_type_name: "MyType".to_string(),
886            version: (3, 5),
887            infer_lists: false,
888            entity_policy: EntityPolicy::default(),
889            log_security_events: false,
890        };
891        assert_eq!(config.default_type_name, "MyType");
892        assert_eq!(config.version, (3, 5));
893        assert!(!config.infer_lists);
894    }
895
896    // ==================== parse_value tests ====================
897
898    #[test]
899    fn test_parse_value_empty() {
900        assert_eq!(parse_value("").unwrap(), Value::Null);
901        assert_eq!(parse_value("   ").unwrap(), Value::Null);
902    }
903
904    #[test]
905    fn test_parse_value_bool_true() {
906        assert_eq!(parse_value("true").unwrap(), Value::Bool(true));
907    }
908
909    #[test]
910    fn test_parse_value_bool_false() {
911        assert_eq!(parse_value("false").unwrap(), Value::Bool(false));
912    }
913
914    #[test]
915    fn test_parse_value_int_positive() {
916        assert_eq!(parse_value("42").unwrap(), Value::Int(42));
917    }
918
919    #[test]
920    fn test_parse_value_int_negative() {
921        assert_eq!(parse_value("-100").unwrap(), Value::Int(-100));
922    }
923
924    #[test]
925    fn test_parse_value_int_zero() {
926        assert_eq!(parse_value("0").unwrap(), Value::Int(0));
927    }
928
929    #[test]
930    fn test_parse_value_float_simple() {
931        if let Value::Float(f) = parse_value("3.5").unwrap() {
932            assert!((f - 3.5).abs() < 0.001);
933        } else {
934            panic!("Expected float");
935        }
936    }
937
938    #[test]
939    fn test_parse_value_float_negative() {
940        if let Value::Float(f) = parse_value("-2.5").unwrap() {
941            assert!((f + 2.5).abs() < 0.001);
942        } else {
943            panic!("Expected float");
944        }
945    }
946
947    #[test]
948    fn test_parse_value_string() {
949        assert_eq!(
950            parse_value("hello").unwrap(),
951            Value::String("hello".to_string().into())
952        );
953    }
954
955    #[test]
956    fn test_parse_value_string_with_spaces() {
957        assert_eq!(
958            parse_value("  hello world  ").unwrap(),
959            Value::String("hello world".to_string().into())
960        );
961    }
962
963    #[test]
964    fn test_parse_value_expression_identifier() {
965        if let Value::Expression(e) = parse_value("$(foo)").unwrap() {
966            assert_eq!(e.to_string(), "foo");
967        } else {
968            panic!("Expected expression");
969        }
970    }
971
972    #[test]
973    fn test_parse_value_expression_call() {
974        if let Value::Expression(e) = parse_value("$(add(x, 1))").unwrap() {
975            assert_eq!(e.to_string(), "add(x, 1)");
976        } else {
977            panic!("Expected expression");
978        }
979    }
980
981    #[test]
982    fn test_parse_value_at_string_not_reference() {
983        // Strings starting with @ are just strings, not references
984        if let Value::String(s) = parse_value("@not-a-ref").unwrap() {
985            assert_eq!(s.as_ref(), "@not-a-ref");
986        } else {
987            panic!("Expected string");
988        }
989    }
990
991    // ==================== parse_reference tests ====================
992
993    #[test]
994    fn test_parse_reference_local() {
995        let ref_val = parse_reference("@user123").unwrap();
996        assert_eq!(ref_val.type_name, None);
997        assert_eq!(ref_val.id.as_ref(), "user123");
998    }
999
1000    #[test]
1001    fn test_parse_reference_qualified() {
1002        let ref_val = parse_reference("@User:123").unwrap();
1003        assert_eq!(ref_val.type_name.as_deref(), Some("User"));
1004        assert_eq!(ref_val.id.as_ref(), "123");
1005    }
1006
1007    #[test]
1008    fn test_parse_reference_with_special_chars() {
1009        let ref_val = parse_reference("@my-item_123").unwrap();
1010        assert_eq!(ref_val.type_name, None);
1011        assert_eq!(ref_val.id.as_ref(), "my-item_123");
1012    }
1013
1014    #[test]
1015    fn test_parse_reference_invalid_no_at() {
1016        let result = parse_reference("user123");
1017        assert!(result.is_err());
1018        assert!(result.unwrap_err().contains("Invalid reference format"));
1019    }
1020
1021    // ==================== parse_version tests ====================
1022
1023    #[test]
1024    fn test_parse_version_valid() {
1025        assert_eq!(parse_version("1.0"), Some((1, 0)));
1026        assert_eq!(parse_version("2.5"), Some((2, 5)));
1027        assert_eq!(parse_version("10.20"), Some((10, 20)));
1028    }
1029
1030    #[test]
1031    fn test_parse_version_with_patch() {
1032        // Only major.minor are taken
1033        assert_eq!(parse_version("1.2.3"), Some((1, 2)));
1034    }
1035
1036    #[test]
1037    fn test_parse_version_invalid() {
1038        assert_eq!(parse_version("invalid"), None);
1039        assert_eq!(parse_version("1"), None);
1040        assert_eq!(parse_version(""), None);
1041        assert_eq!(parse_version("a.b"), None);
1042    }
1043
1044    // ==================== to_hedl_key tests ====================
1045
1046    #[test]
1047    fn test_to_hedl_key_pascal_case() {
1048        assert_eq!(to_hedl_key("Category"), "category");
1049        assert_eq!(to_hedl_key("UserPost"), "user_post");
1050        assert_eq!(to_hedl_key("UserProfileSettings"), "user_profile_settings");
1051    }
1052
1053    #[test]
1054    fn test_to_hedl_key_acronyms() {
1055        assert_eq!(to_hedl_key("XMLData"), "xmldata");
1056        assert_eq!(to_hedl_key("HTTPResponse"), "httpresponse");
1057    }
1058
1059    #[test]
1060    fn test_to_hedl_key_lowercase() {
1061        assert_eq!(to_hedl_key("users"), "users");
1062        assert_eq!(to_hedl_key("category"), "category");
1063    }
1064
1065    #[test]
1066    fn test_to_hedl_key_mixed() {
1067        assert_eq!(to_hedl_key("someXMLData"), "some_xmldata");
1068        assert_eq!(to_hedl_key("getHTTPResponse"), "get_httpresponse");
1069    }
1070
1071    #[test]
1072    fn test_to_hedl_key_with_underscores() {
1073        assert_eq!(to_hedl_key("user_name"), "user_name");
1074        assert_eq!(to_hedl_key("_private"), "private");
1075    }
1076
1077    // ==================== Issue 3 tests: Namespace and invalid character sanitization ====================
1078
1079    #[test]
1080    fn test_issue3_namespace_colon() {
1081        // Namespaced tags like "x:tag" should normalize to "x_tag"
1082        assert_eq!(to_hedl_key("x:tag"), "x_tag");
1083        assert_eq!(to_hedl_key("ns:element"), "ns_element");
1084        assert_eq!(to_hedl_key("xml:lang"), "xml_lang");
1085    }
1086
1087    #[test]
1088    fn test_issue3_hyphens() {
1089        // Hyphens should convert to underscores
1090        assert_eq!(to_hedl_key("my-key"), "my_key");
1091        assert_eq!(to_hedl_key("multi-word-key"), "multi_word_key");
1092    }
1093
1094    #[test]
1095    fn test_issue3_dots() {
1096        // Dots should convert to underscores
1097        assert_eq!(to_hedl_key("key.name"), "key_name");
1098        assert_eq!(to_hedl_key("config.value"), "config_value");
1099    }
1100
1101    #[test]
1102    fn test_issue3_multiple_special_chars() {
1103        // Multiple invalid characters
1104        assert_eq!(to_hedl_key("my:key-name.value"), "my_key_name_value");
1105        assert_eq!(to_hedl_key("x:some-tag.attr"), "x_some_tag_attr");
1106    }
1107
1108    #[test]
1109    fn test_issue3_leading_digit() {
1110        // Keys starting with digit get underscore prefix
1111        assert_eq!(to_hedl_key("123key"), "_123key");
1112        assert_eq!(to_hedl_key("9item"), "_9item");
1113    }
1114
1115    #[test]
1116    fn test_issue3_empty_or_invalid_only() {
1117        // Empty or only invalid characters should return "key"
1118        assert_eq!(to_hedl_key(""), "key");
1119        assert_eq!(to_hedl_key(":::"), "key");
1120        assert_eq!(to_hedl_key("---"), "key");
1121    }
1122
1123    #[test]
1124    fn test_issue3_namespace_with_pascal_case() {
1125        // Combination of namespace and pascal case
1126        assert_eq!(to_hedl_key("ns:UserName"), "ns_user_name");
1127        assert_eq!(to_hedl_key("xml:HTTPRequest"), "xml_httprequest");
1128    }
1129
1130    #[test]
1131    fn test_issue3_xml_integration() {
1132        let xml = r#"<?xml version="1.0"?>
1133        <hedl>
1134            <x:tag>value1</x:tag>
1135            <my-attr>value2</my-attr>
1136            <config.item>value3</config.item>
1137        </hedl>"#;
1138
1139        let config = FromXmlConfig::default();
1140        let doc = from_xml(xml, &config).unwrap();
1141
1142        // All keys should be normalized
1143        assert!(doc.root.contains_key("x_tag"));
1144        assert!(doc.root.contains_key("my_attr"));
1145        assert!(doc.root.contains_key("config_item"));
1146    }
1147
1148    #[test]
1149    fn test_issue3_no_collision_different_separators() {
1150        // Different separators should produce the same normalized key
1151        // This tests that the normalization is consistent
1152        assert_eq!(to_hedl_key("my:key"), to_hedl_key("my-key"));
1153        assert_eq!(to_hedl_key("my.key"), to_hedl_key("my_key"));
1154    }
1155
1156    // ==================== items_are_tensor_elements tests ====================
1157
1158    #[test]
1159    fn test_items_are_tensor_elements_int_scalars() {
1160        let items = vec![
1161            Item::Scalar(Value::Int(1)),
1162            Item::Scalar(Value::Int(2)),
1163            Item::Scalar(Value::Int(3)),
1164        ];
1165        assert!(items_are_tensor_elements(&items));
1166    }
1167
1168    #[test]
1169    fn test_items_are_tensor_elements_float_scalars() {
1170        let items = vec![
1171            Item::Scalar(Value::Float(1.0)),
1172            Item::Scalar(Value::Float(2.0)),
1173        ];
1174        assert!(items_are_tensor_elements(&items));
1175    }
1176
1177    #[test]
1178    fn test_items_are_tensor_elements_tensors() {
1179        let items = vec![
1180            Item::Scalar(Value::Tensor(Box::new(Tensor::Scalar(1.0)))),
1181            Item::Scalar(Value::Tensor(Box::new(Tensor::Scalar(2.0)))),
1182        ];
1183        assert!(items_are_tensor_elements(&items));
1184    }
1185
1186    #[test]
1187    fn test_items_are_tensor_elements_mixed_numeric() {
1188        let items = vec![Item::Scalar(Value::Int(1)), Item::Scalar(Value::Float(2.0))];
1189        assert!(items_are_tensor_elements(&items));
1190    }
1191
1192    #[test]
1193    fn test_items_are_tensor_elements_with_strings() {
1194        let items = vec![
1195            Item::Scalar(Value::Int(1)),
1196            Item::Scalar(Value::String("hello".to_string().into())),
1197        ];
1198        assert!(!items_are_tensor_elements(&items));
1199    }
1200
1201    #[test]
1202    fn test_items_are_tensor_elements_empty() {
1203        let items: Vec<Item> = vec![];
1204        assert!(items_are_tensor_elements(&items));
1205    }
1206
1207    // ==================== items_to_tensor tests ====================
1208
1209    #[test]
1210    fn test_items_to_tensor_int_scalars() {
1211        let items = vec![
1212            Item::Scalar(Value::Int(1)),
1213            Item::Scalar(Value::Int(2)),
1214            Item::Scalar(Value::Int(3)),
1215        ];
1216        let tensor = items_to_tensor(&items).unwrap();
1217        if let Tensor::Array(arr) = tensor {
1218            assert_eq!(arr.len(), 3);
1219            assert_eq!(arr[0], Tensor::Scalar(1.0));
1220        } else {
1221            panic!("Expected array");
1222        }
1223    }
1224
1225    #[test]
1226    fn test_items_to_tensor_float_scalars() {
1227        let items = vec![
1228            Item::Scalar(Value::Float(1.5)),
1229            Item::Scalar(Value::Float(2.5)),
1230        ];
1231        let tensor = items_to_tensor(&items).unwrap();
1232        if let Tensor::Array(arr) = tensor {
1233            assert_eq!(arr.len(), 2);
1234            assert_eq!(arr[0], Tensor::Scalar(1.5));
1235        } else {
1236            panic!("Expected array");
1237        }
1238    }
1239
1240    #[test]
1241    fn test_items_to_tensor_invalid() {
1242        let items = vec![Item::Scalar(Value::String("hello".to_string().into()))];
1243        let result = items_to_tensor(&items);
1244        assert!(result.is_err());
1245    }
1246
1247    // ==================== from_xml basic tests ====================
1248
1249    #[test]
1250    fn test_empty_document() {
1251        let xml = r#"<?xml version="1.0" encoding="UTF-8"?><hedl></hedl>"#;
1252        let config = FromXmlConfig::default();
1253        let doc = from_xml(xml, &config).unwrap();
1254        assert_eq!(doc.root.len(), 0);
1255    }
1256
1257    #[test]
1258    fn test_empty_document_self_closing() {
1259        let xml = r#"<?xml version="1.0" encoding="UTF-8"?><hedl/>"#;
1260        let config = FromXmlConfig::default();
1261        let doc = from_xml(xml, &config).unwrap();
1262        assert_eq!(doc.root.len(), 0);
1263    }
1264
1265    #[test]
1266    fn test_scalar_bool_true() {
1267        let xml = r#"<?xml version="1.0"?><hedl><val>true</val></hedl>"#;
1268        let config = FromXmlConfig::default();
1269        let doc = from_xml(xml, &config).unwrap();
1270        assert_eq!(
1271            doc.root.get("val").and_then(|i| i.as_scalar()),
1272            Some(&Value::Bool(true))
1273        );
1274    }
1275
1276    #[test]
1277    fn test_scalar_bool_false() {
1278        let xml = r#"<?xml version="1.0"?><hedl><val>false</val></hedl>"#;
1279        let config = FromXmlConfig::default();
1280        let doc = from_xml(xml, &config).unwrap();
1281        assert_eq!(
1282            doc.root.get("val").and_then(|i| i.as_scalar()),
1283            Some(&Value::Bool(false))
1284        );
1285    }
1286
1287    #[test]
1288    fn test_scalar_int() {
1289        let xml = r#"<?xml version="1.0"?><hedl><val>42</val></hedl>"#;
1290        let config = FromXmlConfig::default();
1291        let doc = from_xml(xml, &config).unwrap();
1292        assert_eq!(
1293            doc.root.get("val").and_then(|i| i.as_scalar()),
1294            Some(&Value::Int(42))
1295        );
1296    }
1297
1298    #[test]
1299    fn test_scalar_float() {
1300        let xml = r#"<?xml version="1.0"?><hedl><val>3.5</val></hedl>"#;
1301        let config = FromXmlConfig::default();
1302        let doc = from_xml(xml, &config).unwrap();
1303        if let Some(Item::Scalar(Value::Float(f))) = doc.root.get("val") {
1304            assert!((f - 3.5).abs() < 0.001);
1305        } else {
1306            panic!("Expected float");
1307        }
1308    }
1309
1310    #[test]
1311    fn test_scalar_string() {
1312        let xml = r#"<?xml version="1.0"?><hedl><val>hello</val></hedl>"#;
1313        let config = FromXmlConfig::default();
1314        let doc = from_xml(xml, &config).unwrap();
1315        assert_eq!(
1316            doc.root.get("val").and_then(|i| i.as_scalar()),
1317            Some(&Value::String("hello".to_string().into()))
1318        );
1319    }
1320
1321    #[test]
1322    fn test_scalar_null_empty_element() {
1323        let xml = r#"<?xml version="1.0"?><hedl><val></val></hedl>"#;
1324        let config = FromXmlConfig::default();
1325        let doc = from_xml(xml, &config).unwrap();
1326        assert_eq!(
1327            doc.root.get("val").and_then(|i| i.as_scalar()),
1328            Some(&Value::Null)
1329        );
1330    }
1331
1332    #[test]
1333    fn test_scalar_expression() {
1334        let xml = r#"<?xml version="1.0"?><hedl><val>$(foo)</val></hedl>"#;
1335        let config = FromXmlConfig::default();
1336        let doc = from_xml(xml, &config).unwrap();
1337        if let Some(Item::Scalar(Value::Expression(e))) = doc.root.get("val") {
1338            assert_eq!(e.to_string(), "foo");
1339        } else {
1340            panic!("Expected expression");
1341        }
1342    }
1343
1344    // ==================== Nested object tests ====================
1345
1346    #[test]
1347    fn test_nested_object() {
1348        let xml = r#"<?xml version="1.0"?>
1349        <hedl>
1350            <config>
1351                <name>test</name>
1352                <value>100</value>
1353            </config>
1354        </hedl>"#;
1355
1356        let config = FromXmlConfig::default();
1357        let doc = from_xml(xml, &config).unwrap();
1358
1359        let config_item = doc.root.get("config").unwrap();
1360        assert!(config_item.as_object().is_some());
1361
1362        if let Item::Object(obj) = config_item {
1363            assert!(obj.contains_key("name"));
1364            assert!(obj.contains_key("value"));
1365        }
1366    }
1367
1368    #[test]
1369    fn test_deeply_nested_object() {
1370        let xml = r#"<?xml version="1.0"?>
1371        <hedl>
1372            <outer>
1373                <inner>
1374                    <deep>42</deep>
1375                </inner>
1376            </outer>
1377        </hedl>"#;
1378
1379        let config = FromXmlConfig::default();
1380        let doc = from_xml(xml, &config).unwrap();
1381
1382        if let Some(Item::Object(outer)) = doc.root.get("outer") {
1383            if let Some(Item::Object(inner)) = outer.get("inner") {
1384                if let Some(Item::Scalar(Value::Int(n))) = inner.get("deep") {
1385                    assert_eq!(*n, 42);
1386                } else {
1387                    panic!("Expected int");
1388                }
1389            } else {
1390                panic!("Expected inner object");
1391            }
1392        } else {
1393            panic!("Expected outer object");
1394        }
1395    }
1396
1397    // ==================== List inference tests ====================
1398
1399    #[test]
1400    fn test_infer_list_repeated_elements() {
1401        let xml = r#"<?xml version="1.0"?>
1402        <hedl>
1403            <user id="1"><name>Alice</name></user>
1404            <user id="2"><name>Bob</name></user>
1405        </hedl>"#;
1406
1407        let config = FromXmlConfig {
1408            infer_lists: true,
1409            ..Default::default()
1410        };
1411        let doc = from_xml(xml, &config).unwrap();
1412
1413        if let Some(Item::List(list)) = doc.root.get("user") {
1414            assert_eq!(list.rows.len(), 2);
1415        } else {
1416            panic!("Expected list");
1417        }
1418    }
1419
1420    #[test]
1421    fn test_no_infer_list_single_element() {
1422        let xml = r#"<?xml version="1.0"?>
1423        <hedl>
1424            <user id="1"><name>Alice</name></user>
1425        </hedl>"#;
1426
1427        let config = FromXmlConfig {
1428            infer_lists: true,
1429            ..Default::default()
1430        };
1431        let doc = from_xml(xml, &config).unwrap();
1432
1433        // Single element should remain as object
1434        assert!(doc.root.get("user").and_then(|i| i.as_object()).is_some());
1435    }
1436
1437    #[test]
1438    fn test_infer_list_disabled() {
1439        let xml = r#"<?xml version="1.0"?>
1440        <hedl>
1441            <user id="1"><name>Alice</name></user>
1442            <user id="2"><name>Bob</name></user>
1443        </hedl>"#;
1444
1445        let config = FromXmlConfig {
1446            infer_lists: false,
1447            ..Default::default()
1448        };
1449        let result = from_xml(xml, &config);
1450
1451        // UPDATED for Issue 2 fix: With infer_lists disabled, duplicates now error
1452        assert!(result.is_err());
1453        let err = result.unwrap_err();
1454        assert!(err.contains("Duplicate element"));
1455    }
1456
1457    // ==================== Attribute parsing tests ====================
1458
1459    #[test]
1460    fn test_attributes_to_object() {
1461        let xml = r#"<?xml version="1.0"?>
1462        <hedl>
1463            <item id="123" name="test" active="true"/>
1464        </hedl>"#;
1465
1466        let config = FromXmlConfig::default();
1467        let doc = from_xml(xml, &config).unwrap();
1468
1469        if let Some(Item::Object(obj)) = doc.root.get("item") {
1470            assert_eq!(
1471                obj.get("id").and_then(|i| i.as_scalar()),
1472                Some(&Value::Int(123))
1473            );
1474            assert_eq!(
1475                obj.get("name").and_then(|i| i.as_scalar()),
1476                Some(&Value::String("test".to_string().into()))
1477            );
1478            assert_eq!(
1479                obj.get("active").and_then(|i| i.as_scalar()),
1480                Some(&Value::Bool(true))
1481            );
1482        } else {
1483            panic!("Expected object");
1484        }
1485    }
1486
1487    #[test]
1488    fn test_single_value_attribute() {
1489        let xml = r#"<?xml version="1.0"?>
1490        <hedl>
1491            <item value="42"/>
1492        </hedl>"#;
1493
1494        let config = FromXmlConfig::default();
1495        let doc = from_xml(xml, &config).unwrap();
1496
1497        assert_eq!(
1498            doc.root.get("item").and_then(|i| i.as_scalar()),
1499            Some(&Value::Int(42))
1500        );
1501    }
1502
1503    // ==================== Version parsing from root ====================
1504
1505    #[test]
1506    fn test_version_from_root_attribute() {
1507        let xml = r#"<?xml version="1.0"?><hedl version="2.5"></hedl>"#;
1508        let config = FromXmlConfig::default();
1509        let doc = from_xml(xml, &config).unwrap();
1510        assert_eq!(doc.version, (2, 5));
1511    }
1512
1513    #[test]
1514    fn test_version_default() {
1515        let xml = r#"<?xml version="1.0"?><hedl></hedl>"#;
1516        let config = FromXmlConfig {
1517            version: (3, 1),
1518            ..Default::default()
1519        };
1520        let doc = from_xml(xml, &config).unwrap();
1521        assert_eq!(doc.version, (3, 1));
1522    }
1523
1524    // ==================== Reference with marker attribute ====================
1525
1526    #[test]
1527    fn test_reference_with_marker() {
1528        let xml = r#"<?xml version="1.0"?>
1529        <hedl>
1530            <ref __hedl_type__="ref">@user123</ref>
1531        </hedl>"#;
1532
1533        let config = FromXmlConfig::default();
1534        let doc = from_xml(xml, &config).unwrap();
1535
1536        if let Some(Item::Scalar(Value::Reference(r))) = doc.root.get("ref") {
1537            assert_eq!(r.id.as_ref(), "user123");
1538        } else {
1539            panic!("Expected reference");
1540        }
1541    }
1542
1543    #[test]
1544    fn test_qualified_reference_with_marker() {
1545        let xml = r#"<?xml version="1.0"?>
1546        <hedl>
1547            <ref __hedl_type__="ref">@User:456</ref>
1548        </hedl>"#;
1549
1550        let config = FromXmlConfig::default();
1551        let doc = from_xml(xml, &config).unwrap();
1552
1553        if let Some(Item::Scalar(Value::Reference(r))) = doc.root.get("ref") {
1554            assert_eq!(r.type_name.as_deref(), Some("User"));
1555            assert_eq!(r.id.as_ref(), "456");
1556        } else {
1557            panic!("Expected reference");
1558        }
1559    }
1560
1561    // ==================== Error cases ====================
1562
1563    #[test]
1564    fn test_empty_input() {
1565        // Empty input should produce an empty document
1566        let xml = "";
1567        let config = FromXmlConfig::default();
1568        let doc = from_xml(xml, &config).unwrap();
1569        assert!(doc.root.is_empty());
1570    }
1571
1572    #[test]
1573    fn test_only_declaration() {
1574        // Only XML declaration should produce an empty document
1575        let xml = r#"<?xml version="1.0"?>"#;
1576        let config = FromXmlConfig::default();
1577        let doc = from_xml(xml, &config).unwrap();
1578        assert!(doc.root.is_empty());
1579    }
1580
1581    // ==================== Edge cases ====================
1582
1583    #[test]
1584    fn test_unicode_content() {
1585        let xml = r#"<?xml version="1.0" encoding="UTF-8"?>
1586        <hedl>
1587            <name>héllo 世界</name>
1588        </hedl>"#;
1589
1590        let config = FromXmlConfig::default();
1591        let doc = from_xml(xml, &config).unwrap();
1592
1593        assert_eq!(
1594            doc.root.get("name").and_then(|i| i.as_scalar()),
1595            Some(&Value::String("héllo 世界".to_string().into()))
1596        );
1597    }
1598
1599    #[test]
1600    fn test_whitespace_handling() {
1601        let xml = r#"<?xml version="1.0"?>
1602        <hedl>
1603            <val>   hello world   </val>
1604        </hedl>"#;
1605
1606        let config = FromXmlConfig::default();
1607        let doc = from_xml(xml, &config).unwrap();
1608
1609        // Whitespace should be trimmed
1610        assert_eq!(
1611            doc.root.get("val").and_then(|i| i.as_scalar()),
1612            Some(&Value::String("hello world".to_string().into()))
1613        );
1614    }
1615
1616    #[test]
1617    fn test_cdata_content() {
1618        let xml = r#"<?xml version="1.0"?>
1619        <hedl>
1620            <text><![CDATA[<not>xml</not>]]></text>
1621        </hedl>"#;
1622
1623        let config = FromXmlConfig::default();
1624        let doc = from_xml(xml, &config).unwrap();
1625
1626        // CDATA content should be preserved
1627        assert!(doc.root.contains_key("text"));
1628    }
1629
1630    #[test]
1631    fn test_key_conversion_from_pascal_case() {
1632        let xml = r#"<?xml version="1.0"?>
1633        <hedl>
1634            <UserName>test</UserName>
1635        </hedl>"#;
1636
1637        let config = FromXmlConfig::default();
1638        let doc = from_xml(xml, &config).unwrap();
1639
1640        // UserName should be converted to user_name
1641        assert!(doc.root.contains_key("user_name"));
1642    }
1643
1644    // ==================== Issue 1 tests: Attributes preserved ====================
1645
1646    #[test]
1647    fn test_issue1_attributes_with_child_elements() {
1648        let xml = r#"<?xml version="1.0"?>
1649        <hedl>
1650            <item id="1"><name>A</name></item>
1651        </hedl>"#;
1652
1653        let config = FromXmlConfig::default();
1654        let doc = from_xml(xml, &config).unwrap();
1655
1656        if let Some(Item::Object(obj)) = doc.root.get("item") {
1657            // Should have both id attribute and name child
1658            assert_eq!(
1659                obj.get("id").and_then(|i| i.as_scalar()),
1660                Some(&Value::Int(1))
1661            );
1662            assert_eq!(
1663                obj.get("name").and_then(|i| i.as_scalar()),
1664                Some(&Value::String("A".to_string().into()))
1665            );
1666        } else {
1667            panic!("Expected object with both id and name");
1668        }
1669    }
1670
1671    #[test]
1672    fn test_issue1_attributes_with_text_content() {
1673        let xml = r#"<?xml version="1.0"?>
1674        <hedl>
1675            <item id="2" type="primary">Content text</item>
1676        </hedl>"#;
1677
1678        let config = FromXmlConfig::default();
1679        let doc = from_xml(xml, &config).unwrap();
1680
1681        if let Some(Item::Object(obj)) = doc.root.get("item") {
1682            // Should have id, type attributes and _text for content
1683            assert_eq!(
1684                obj.get("id").and_then(|i| i.as_scalar()),
1685                Some(&Value::Int(2))
1686            );
1687            assert_eq!(
1688                obj.get("type").and_then(|i| i.as_scalar()),
1689                Some(&Value::String("primary".to_string().into()))
1690            );
1691            assert_eq!(
1692                obj.get("_text").and_then(|i| i.as_scalar()),
1693                Some(&Value::String("Content text".to_string().into()))
1694            );
1695        } else {
1696            panic!("Expected object with id, type and _text");
1697        }
1698    }
1699
1700    #[test]
1701    fn test_issue1_attributes_with_both_children_and_text() {
1702        let xml = r#"<?xml version="1.0"?>
1703        <hedl>
1704            <item id="3" status="active">
1705                <name>Item 3</name>
1706                Some text content
1707            </item>
1708        </hedl>"#;
1709
1710        let config = FromXmlConfig::default();
1711        let doc = from_xml(xml, &config).unwrap();
1712
1713        if let Some(Item::Object(obj)) = doc.root.get("item") {
1714            // Should have id, status attributes, name child, and _text
1715            assert_eq!(
1716                obj.get("id").and_then(|i| i.as_scalar()),
1717                Some(&Value::Int(3))
1718            );
1719            assert_eq!(
1720                obj.get("status").and_then(|i| i.as_scalar()),
1721                Some(&Value::String("active".to_string().into()))
1722            );
1723            assert_eq!(
1724                obj.get("name").and_then(|i| i.as_scalar()),
1725                Some(&Value::String("Item 3".to_string().into()))
1726            );
1727            assert!(obj.contains_key("_text"));
1728        } else {
1729            panic!("Expected object with attributes, children and text");
1730        }
1731    }
1732
1733    #[test]
1734    fn test_issue1_multiple_attributes_preserved() {
1735        let xml = r#"<?xml version="1.0"?>
1736        <hedl>
1737            <product id="100" name="Widget" price="19.99" available="true">
1738                <description>A useful widget</description>
1739            </product>
1740        </hedl>"#;
1741
1742        let config = FromXmlConfig::default();
1743        let doc = from_xml(xml, &config).unwrap();
1744
1745        if let Some(Item::Object(obj)) = doc.root.get("product") {
1746            assert_eq!(
1747                obj.get("id").and_then(|i| i.as_scalar()),
1748                Some(&Value::Int(100))
1749            );
1750            assert_eq!(
1751                obj.get("name").and_then(|i| i.as_scalar()),
1752                Some(&Value::String("Widget".to_string().into()))
1753            );
1754            if let Some(Item::Scalar(Value::Float(f))) = obj.get("price") {
1755                assert!((f - 19.99).abs() < 0.001);
1756            } else {
1757                panic!("Expected price float");
1758            }
1759            assert_eq!(
1760                obj.get("available").and_then(|i| i.as_scalar()),
1761                Some(&Value::Bool(true))
1762            );
1763            assert_eq!(
1764                obj.get("description").and_then(|i| i.as_scalar()),
1765                Some(&Value::String("A useful widget".to_string().into()))
1766            );
1767        } else {
1768            panic!("Expected object with all attributes and description");
1769        }
1770    }
1771
1772    // ==================== Issue 2 tests: Duplicate elements handling ====================
1773
1774    #[test]
1775    fn test_issue2_duplicate_elements_with_infer_lists_false() {
1776        let xml = r#"<?xml version="1.0"?>
1777        <hedl>
1778            <item>First</item>
1779            <item>Second</item>
1780        </hedl>"#;
1781
1782        let config = FromXmlConfig {
1783            infer_lists: false,
1784            ..Default::default()
1785        };
1786        let result = from_xml(xml, &config);
1787
1788        // Should error when duplicates found with infer_lists=false
1789        assert!(result.is_err());
1790        let err = result.unwrap_err();
1791        assert!(err.contains("Duplicate element"));
1792        assert!(err.contains("infer_lists=false"));
1793    }
1794
1795    #[test]
1796    fn test_issue2_duplicate_elements_with_infer_lists_true() {
1797        let xml = r#"<?xml version="1.0"?>
1798        <hedl>
1799            <item>First</item>
1800            <item>Second</item>
1801            <item>Third</item>
1802        </hedl>"#;
1803
1804        let config = FromXmlConfig {
1805            infer_lists: true,
1806            ..Default::default()
1807        };
1808        let doc = from_xml(xml, &config).unwrap();
1809
1810        // Should create a list when duplicates found with infer_lists=true
1811        if let Some(Item::List(list)) = doc.root.get("item") {
1812            assert_eq!(list.rows.len(), 3);
1813        } else {
1814            panic!("Expected list with 3 items");
1815        }
1816    }
1817
1818    #[test]
1819    fn test_issue2_no_error_for_unique_elements_with_infer_lists_false() {
1820        let xml = r#"<?xml version="1.0"?>
1821        <hedl>
1822            <first>1</first>
1823            <second>2</second>
1824            <third>3</third>
1825        </hedl>"#;
1826
1827        let config = FromXmlConfig {
1828            infer_lists: false,
1829            ..Default::default()
1830        };
1831        let doc = from_xml(xml, &config).unwrap();
1832
1833        // Should succeed when all elements are unique
1834        assert_eq!(doc.root.len(), 3);
1835        assert!(doc.root.contains_key("first"));
1836        assert!(doc.root.contains_key("second"));
1837        assert!(doc.root.contains_key("third"));
1838    }
1839
1840    #[test]
1841    fn test_issue2_duplicate_nested_elements_with_infer_lists_false() {
1842        let xml = r#"<?xml version="1.0"?>
1843        <hedl>
1844            <parent>
1845                <child>First</child>
1846                <child>Second</child>
1847            </parent>
1848        </hedl>"#;
1849
1850        let config = FromXmlConfig {
1851            infer_lists: false,
1852            ..Default::default()
1853        };
1854        let result = from_xml(xml, &config);
1855
1856        // Should error for nested duplicates too
1857        assert!(result.is_err());
1858        let err = result.unwrap_err();
1859        assert!(err.contains("Duplicate element"));
1860    }
1861}