hedl_xml/
from_xml.rs

1// Dweve HEDL - Hierarchical Entity Data Language
2//
3// Copyright (c) 2025 Dweve IP B.V. and individual contributors.
4//
5// SPDX-License-Identifier: Apache-2.0
6//
7// Licensed under the Apache License, Version 2.0 (the "License");
8// you may not use this file except in compliance with the License.
9// You may obtain a copy of the License in the LICENSE file at the
10// root of this repository or at: http://www.apache.org/licenses/LICENSE-2.0
11//
12// Unless required by applicable law or agreed to in writing, software
13// distributed under the License is distributed on an "AS IS" BASIS,
14// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15// See the License for the specific language governing permissions and
16// limitations under the License.
17
18//! XML to HEDL conversion
19
20use hedl_core::convert::parse_reference;
21use hedl_core::{Document, Item, MatrixList, Node, Value};
22use hedl_core::lex::{parse_expression_token, singularize_and_capitalize};
23use hedl_core::lex::Tensor;
24use quick_xml::events::Event;
25use quick_xml::Reader;
26use std::collections::BTreeMap;
27
28/// Maximum recursion depth for XML parsing (prevents stack overflow).
29const MAX_RECURSION_DEPTH: usize = 100;
30
31/// Configuration for XML import
32#[derive(Debug, Clone)]
33pub struct FromXmlConfig {
34    /// Default type name for list items without metadata
35    pub default_type_name: String,
36    /// HEDL version to use
37    pub version: (u32, u32),
38    /// Try to infer list structures from repeated elements
39    pub infer_lists: bool,
40}
41
42impl Default for FromXmlConfig {
43    fn default() -> Self {
44        Self {
45            default_type_name: "Item".to_string(),
46            version: (1, 0),
47            infer_lists: true,
48        }
49    }
50}
51
52impl hedl_core::convert::ImportConfig for FromXmlConfig {
53    fn default_type_name(&self) -> &str {
54        &self.default_type_name
55    }
56
57    fn version(&self) -> (u32, u32) {
58        self.version
59    }
60}
61
62/// Convert XML string to HEDL Document
63pub fn from_xml(xml: &str, config: &FromXmlConfig) -> Result<Document, String> {
64    let mut reader = Reader::from_str(xml);
65    reader.trim_text(true);
66
67    let mut doc = Document::new(config.version);
68
69    // Skip XML declaration and find root element
70    loop {
71        match reader.read_event() {
72            Ok(Event::Start(e)) | Ok(Event::Empty(e)) => {
73                let name = String::from_utf8_lossy(e.name().as_ref()).to_string();
74
75                // Parse version from root if present
76                for attr in e.attributes().flatten() {
77                    let key = String::from_utf8_lossy(attr.key.as_ref()).to_string();
78                    let value = String::from_utf8_lossy(&attr.value).to_string();
79                    if key == "version" {
80                        if let Some((major, minor)) = parse_version(&value) {
81                            doc.version = (major, minor);
82                        }
83                    }
84                }
85
86                // Parse root content
87                doc.root = parse_children(&mut reader, &name, config, &mut doc.structs, 0)?;
88                break;
89            }
90            Ok(Event::Eof) => break,
91            Err(e) => {
92                return Err(format!(
93                    "XML parse error at position {}: {}",
94                    reader.buffer_position(),
95                    e
96                ))
97            }
98            _ => {}
99        }
100    }
101
102    Ok(doc)
103}
104
105fn parse_children(
106    reader: &mut Reader<&[u8]>,
107    parent_name: &str,
108    config: &FromXmlConfig,
109    structs: &mut BTreeMap<String, Vec<String>>,
110    depth: usize,
111) -> Result<BTreeMap<String, Item>, String> {
112    // Security: Prevent stack overflow via deep recursion
113    if depth > MAX_RECURSION_DEPTH {
114        return Err(format!(
115            "XML recursion depth exceeded (max: {})",
116            MAX_RECURSION_DEPTH
117        ));
118    }
119    let mut children = BTreeMap::new();
120    let mut element_counts: BTreeMap<String, Vec<Item>> = BTreeMap::new();
121
122    loop {
123        match reader.read_event() {
124            Ok(Event::Start(e)) => {
125                let raw_name = String::from_utf8_lossy(e.name().as_ref()).to_string();
126                let name = to_hedl_key(&raw_name);
127                let elem_owned = e.to_owned();
128                let item = parse_element(reader, &elem_owned, config, depth + 1)?;
129
130                // Track repeated elements for list inference
131                if config.infer_lists {
132                    element_counts.entry(name.clone()).or_default().push(item);
133                } else {
134                    children.insert(name, item);
135                }
136            }
137            Ok(Event::Empty(e)) => {
138                let raw_name = String::from_utf8_lossy(e.name().as_ref()).to_string();
139                let name = to_hedl_key(&raw_name);
140                let elem_owned = e.to_owned();
141                let item = parse_empty_element(&elem_owned)?;
142
143                if config.infer_lists {
144                    element_counts.entry(name.clone()).or_default().push(item);
145                } else {
146                    children.insert(name, item);
147                }
148            }
149            Ok(Event::End(e)) => {
150                let name = String::from_utf8_lossy(e.name().as_ref()).to_string();
151                if name == parent_name {
152                    break;
153                }
154            }
155            Ok(Event::Eof) => break,
156            Err(e) => return Err(format!("XML parse error: {}", e)),
157            _ => {}
158        }
159    }
160
161    // Process element counts to infer lists
162    if config.infer_lists {
163        for (name, items) in element_counts {
164            if items.len() > 1 {
165                // Multiple elements with same name - convert to list
166                let list = items_to_matrix_list(&name, items, config, structs)?;
167                children.insert(name, Item::List(list));
168            } else if let Some(item) = items.into_iter().next() {
169                children.insert(name, item);
170            }
171        }
172    }
173
174    Ok(children)
175}
176
177fn parse_element(
178    reader: &mut Reader<&[u8]>,
179    elem: &quick_xml::events::BytesStart,
180    config: &FromXmlConfig,
181    depth: usize,
182) -> Result<Item, String> {
183    // Security: Prevent stack overflow via deep recursion
184    if depth > MAX_RECURSION_DEPTH {
185        return Err(format!(
186            "XML recursion depth exceeded (max: {})",
187            MAX_RECURSION_DEPTH
188        ));
189    }
190    let name = String::from_utf8_lossy(elem.name().as_ref()).to_string();
191
192    // Extract attributes (convert keys to valid HEDL format)
193    let mut attributes = BTreeMap::new();
194    let mut is_reference = false;
195    for attr in elem.attributes().flatten() {
196        let raw_key = String::from_utf8_lossy(attr.key.as_ref()).to_string();
197        let value = String::from_utf8_lossy(&attr.value).to_string();
198
199        // Check for HEDL type marker (used to distinguish references from strings)
200        if raw_key == "__hedl_type__" {
201            if value == "ref" {
202                is_reference = true;
203            }
204            continue; // Don't include in regular attributes
205        }
206
207        let key = to_hedl_key(&raw_key);
208        attributes.insert(key, value);
209    }
210
211    // Parse content
212    let mut text_content = String::new();
213    let mut child_elements: BTreeMap<String, Vec<Item>> = BTreeMap::new();
214    let mut marked_children: BTreeMap<String, Vec<Item>> = BTreeMap::new(); // Elements with __hedl_child__
215    let mut has_children = false;
216
217    loop {
218        match reader.read_event() {
219            Ok(Event::Start(e)) => {
220                has_children = true;
221                let raw_child_name = String::from_utf8_lossy(e.name().as_ref()).to_string();
222                let child_name = to_hedl_key(&raw_child_name);
223
224                // Check for __hedl_child__ marker attribute
225                let is_marked_child = e.attributes().any(|attr| {
226                    if let Ok(attr) = attr {
227                        let key = String::from_utf8_lossy(attr.key.as_ref());
228                        let val = String::from_utf8_lossy(&attr.value);
229                        key == "__hedl_child__" && val == "true"
230                    } else {
231                        false
232                    }
233                });
234
235                let elem_owned = e.to_owned();
236                let child_item = parse_element(reader, &elem_owned, config, depth + 1)?;
237
238                if is_marked_child {
239                    marked_children
240                        .entry(raw_child_name)
241                        .or_default()
242                        .push(child_item);
243                } else {
244                    child_elements
245                        .entry(child_name)
246                        .or_default()
247                        .push(child_item);
248                }
249            }
250            Ok(Event::Empty(e)) => {
251                has_children = true;
252                let raw_child_name = String::from_utf8_lossy(e.name().as_ref()).to_string();
253                let child_name = to_hedl_key(&raw_child_name);
254
255                // Check for __hedl_child__ marker attribute
256                let is_marked_child = e.attributes().any(|attr| {
257                    if let Ok(attr) = attr {
258                        let key = String::from_utf8_lossy(attr.key.as_ref());
259                        let val = String::from_utf8_lossy(&attr.value);
260                        key == "__hedl_child__" && val == "true"
261                    } else {
262                        false
263                    }
264                });
265
266                let elem_owned = e.to_owned();
267                let child_item = parse_empty_element(&elem_owned)?;
268
269                if is_marked_child {
270                    marked_children
271                        .entry(raw_child_name)
272                        .or_default()
273                        .push(child_item);
274                } else {
275                    child_elements
276                        .entry(child_name)
277                        .or_default()
278                        .push(child_item);
279                }
280            }
281            Ok(Event::Text(e)) => {
282                text_content.push_str(
283                    &e.unescape()
284                        .map_err(|e| format!("Text unescape error: {}", e))?,
285                );
286            }
287            Ok(Event::End(e)) => {
288                let end_name = String::from_utf8_lossy(e.name().as_ref()).to_string();
289                if end_name == name {
290                    break;
291                }
292            }
293            Ok(Event::Eof) => break,
294            Err(e) => return Err(format!("XML parse error: {}", e)),
295            _ => {}
296        }
297    }
298
299    // Determine item type
300    if has_children {
301        // Convert collected child elements, inferring lists for repeated elements
302        let mut result_children = BTreeMap::new();
303        for (child_name, items) in child_elements {
304            if items.len() > 1 && config.infer_lists {
305                // Check if all items are scalars/tensors (->tensor) or objects (->matrix list)
306                if child_name == "item" && items_are_tensor_elements(&items) {
307                    // Convert to tensor
308                    let tensor = items_to_tensor(&items)?;
309                    result_children.insert(child_name, Item::Scalar(Value::Tensor(tensor)));
310                } else {
311                    // Multiple elements with same name - convert to list
312                    let list =
313                        items_to_matrix_list(&child_name, items, config, &mut BTreeMap::new())?;
314                    result_children.insert(child_name, Item::List(list));
315                }
316            } else if let Some(item) = items.into_iter().next() {
317                result_children.insert(child_name, item);
318            }
319        }
320
321        // Convert marked children (elements with __hedl_child__="true") to lists
322        // These represent NEST hierarchical children that should be attached to nodes
323        for (child_type_raw, child_items) in marked_children {
324            if !child_items.is_empty() {
325                // Convert to matrix list (even a single child becomes a list)
326                let list = items_to_matrix_list(
327                    &child_type_raw,
328                    child_items,
329                    config,
330                    &mut BTreeMap::new(),
331                )?;
332                let child_key = to_hedl_key(&child_type_raw);
333                result_children.insert(child_key, Item::List(list));
334            }
335        }
336
337        // Check if we should flatten: if object has single child that's a list,
338        // and the child name is the singular of the parent name, promote the list.
339        // This handles XML patterns like <users><user>...</user><user>...</user></users>
340        // which should become users: @User[...] not users: { user: @User[...] }
341        // BUT: don't flatten if the list has hierarchical children (NEST structures)
342        if result_children.len() == 1 {
343            let (child_key, child_item) = result_children.iter().next().unwrap();
344            if let Item::List(list) = child_item {
345                // Don't flatten if any rows have children (hierarchical nesting)
346                let has_nested_children = list.rows.iter().any(|node| !node.children.is_empty());
347                if !has_nested_children {
348                    // Check if child is singular form of parent
349                    // Compare case-insensitively because XML element names may have different casing
350                    // e.g., post_tags -> PostTag, but child element might be posttag -> Posttag
351                    let parent_singular =
352                        singularize_and_capitalize(&to_hedl_key(&name)).to_lowercase();
353                    let child_type = singularize_and_capitalize(child_key).to_lowercase();
354                    if parent_singular == child_type {
355                        // Flatten: return the list directly
356                        return Ok(result_children.into_values().next().unwrap());
357                    }
358                }
359            }
360        }
361
362        // Object with nested elements
363        Ok(Item::Object(result_children))
364    } else if !text_content.trim().is_empty() {
365        // Scalar with text content
366        let value = if is_reference {
367            // Explicitly marked as reference
368            Value::Reference(parse_reference(text_content.trim())?)
369        } else {
370            parse_value(&text_content)?
371        };
372        Ok(Item::Scalar(value))
373    } else if !attributes.is_empty() {
374        // Empty element with attributes - convert to object
375        let mut obj = BTreeMap::new();
376        for (key, value_str) in attributes {
377            let value = parse_value(&value_str)?;
378            obj.insert(key, Item::Scalar(value));
379        }
380        Ok(Item::Object(obj))
381    } else {
382        // Empty element - null value
383        Ok(Item::Scalar(Value::Null))
384    }
385}
386
387fn parse_empty_element(elem: &quick_xml::events::BytesStart) -> Result<Item, String> {
388    let mut attributes = BTreeMap::new();
389
390    for attr in elem.attributes().flatten() {
391        let raw_key = String::from_utf8_lossy(attr.key.as_ref()).to_string();
392        let key = to_hedl_key(&raw_key);
393        let value = String::from_utf8_lossy(&attr.value).to_string();
394        attributes.insert(key, value);
395    }
396
397    if attributes.is_empty() {
398        Ok(Item::Scalar(Value::Null))
399    } else if attributes.len() == 1 && attributes.contains_key("value") {
400        // Special case: <elem value="x"/> -> scalar x
401        let value_str = attributes.get("value").unwrap();
402        let value = parse_value(value_str)?;
403        Ok(Item::Scalar(value))
404    } else {
405        // Multiple attributes - convert to object
406        let mut obj = BTreeMap::new();
407        for (key, value_str) in attributes {
408            let value = parse_value(&value_str)?;
409            obj.insert(key, Item::Scalar(value));
410        }
411        Ok(Item::Object(obj))
412    }
413}
414
415fn parse_value(s: &str) -> Result<Value, String> {
416    let trimmed = s.trim();
417
418    if trimmed.is_empty() {
419        return Ok(Value::Null);
420    }
421
422    // Note: References are NOT auto-detected from @... pattern.
423    // They must be explicitly marked with __hedl_type__="ref" attribute.
424    // This prevents strings like "@not-a-ref" from being incorrectly parsed as references.
425
426    // Check for expression pattern $(...)
427    if trimmed.starts_with("$(") && trimmed.ends_with(')') {
428        let expr =
429            parse_expression_token(trimmed).map_err(|e| format!("Invalid expression: {}", e))?;
430        return Ok(Value::Expression(expr));
431    }
432
433    // Try parsing as boolean
434    if trimmed == "true" {
435        return Ok(Value::Bool(true));
436    }
437    if trimmed == "false" {
438        return Ok(Value::Bool(false));
439    }
440
441    // Try parsing as number
442    if let Ok(i) = trimmed.parse::<i64>() {
443        return Ok(Value::Int(i));
444    }
445    if let Ok(f) = trimmed.parse::<f64>() {
446        return Ok(Value::Float(f));
447    }
448
449    // Default to string
450    Ok(Value::String(trimmed.to_string()))
451}
452
453fn parse_version(s: &str) -> Option<(u32, u32)> {
454    let parts: Vec<&str> = s.split('.').collect();
455    if parts.len() >= 2 {
456        let major = parts[0].parse().ok()?;
457        let minor = parts[1].parse().ok()?;
458        Some((major, minor))
459    } else {
460        None
461    }
462}
463
464fn items_to_matrix_list(
465    name: &str,
466    items: Vec<Item>,
467    _config: &FromXmlConfig,
468    structs: &mut BTreeMap<String, Vec<String>>,
469) -> Result<MatrixList, String> {
470    // Infer type name from element name (singularize and capitalize)
471    let type_name = singularize_and_capitalize(name);
472
473    // Infer schema from first item
474    let schema = infer_schema(&items)?;
475
476    // Register the struct definition
477    structs.insert(type_name.clone(), schema.clone());
478
479    let mut rows = Vec::new();
480    for (idx, item) in items.into_iter().enumerate() {
481        let node = item_to_node(&type_name, &schema, item, idx)?;
482        rows.push(node);
483    }
484
485    Ok(MatrixList {
486        type_name,
487        schema,
488        rows,
489        count_hint: None,
490    })
491}
492
493fn infer_schema(items: &[Item]) -> Result<Vec<String>, String> {
494    if let Some(Item::Object(first_obj)) = items.first() {
495        // Only include scalar fields in the schema, not nested lists or child objects
496        let mut keys: Vec<_> = first_obj
497            .iter()
498            .filter(|(_, item)| matches!(item, Item::Scalar(_)))
499            .map(|(k, _)| k.clone())
500            .collect();
501        keys.sort();
502
503        // Ensure "id" is first if present
504        if let Some(pos) = keys.iter().position(|k| k == "id") {
505            keys.remove(pos);
506            keys.insert(0, "id".to_string());
507        } else {
508            // Add implicit id column
509            keys.insert(0, "id".to_string());
510        }
511
512        Ok(keys)
513    } else {
514        // Default schema
515        Ok(vec!["id".to_string(), "value".to_string()])
516    }
517}
518
519fn item_to_node(
520    type_name: &str,
521    schema: &[String],
522    item: Item,
523    idx: usize,
524) -> Result<Node, String> {
525    match item {
526        Item::Object(obj) => {
527            // Extract ID from object or generate one
528            let id = obj
529                .get(&schema[0])
530                .and_then(|i| i.as_scalar())
531                .and_then(|v| v.as_str())
532                .map(|s| s.to_string())
533                .unwrap_or_else(|| format!("{}", idx));
534
535            // Extract ALL field values (including ID) per SPEC
536            let mut fields = Vec::new();
537            for col in schema {
538                let value = obj
539                    .get(col)
540                    .and_then(|i| i.as_scalar())
541                    .cloned()
542                    .unwrap_or(Value::Null);
543                fields.push(value);
544            }
545
546            // Extract nested children (Item::List entries become child nodes)
547            let mut children: BTreeMap<String, Vec<Node>> = BTreeMap::new();
548            for child_item in obj.values() {
549                if let Item::List(child_list) = child_item {
550                    // Convert child list rows to nodes
551                    children.insert(child_list.type_name.clone(), child_list.rows.clone());
552                }
553            }
554
555            Ok(Node {
556                type_name: type_name.to_string(),
557                id,
558                fields,
559                children,
560                child_count: None,
561            })
562        }
563        Item::Scalar(value) => {
564            // Single scalar - create node with ID value and scalar value
565            let id = format!("{}", idx);
566            Ok(Node {
567                type_name: type_name.to_string(),
568                id: id.clone(),
569                fields: vec![Value::String(id), value],
570                children: BTreeMap::new(),
571                child_count: None,
572            })
573        }
574        Item::List(_) => Err("Cannot convert nested list to node".to_string()),
575    }
576}
577
578/// Convert any string to a valid HEDL key (lowercase snake_case).
579/// "Category" -> "category", "UserPost" -> "user_post", "XMLData" -> "xmldata"
580fn to_hedl_key(s: &str) -> String {
581    let mut result = String::new();
582    let mut prev_was_upper = false;
583
584    for (i, c) in s.chars().enumerate() {
585        if c.is_ascii_uppercase() {
586            // Add underscore before uppercase letter (except at start or after another uppercase)
587            if i > 0 && !prev_was_upper && !result.ends_with('_') {
588                result.push('_');
589            }
590            result.push(c.to_ascii_lowercase());
591            prev_was_upper = true;
592        } else {
593            result.push(c);
594            prev_was_upper = false;
595        }
596    }
597
598    // Clean up any double underscores
599    while result.contains("__") {
600        result = result.replace("__", "_");
601    }
602
603    // Remove leading/trailing underscores
604    result.trim_matches('_').to_string()
605}
606
607/// Check if all items are suitable for tensor representation.
608/// Items must be numeric scalars or objects containing only a tensor at the "item" key.
609fn items_are_tensor_elements(items: &[Item]) -> bool {
610    items.iter().all(|item| {
611        match item {
612            // Direct numeric scalars
613            Item::Scalar(Value::Int(_)) => true,
614            Item::Scalar(Value::Float(_)) => true,
615            // Already-parsed tensors
616            Item::Scalar(Value::Tensor(_)) => true,
617            // Objects with single "item" key containing a tensor (nested arrays)
618            Item::Object(obj) if obj.len() == 1 => {
619                matches!(obj.get("item"), Some(Item::Scalar(Value::Tensor(_))))
620            }
621            _ => false,
622        }
623    })
624}
625
626/// Convert items to a tensor.
627fn items_to_tensor(items: &[Item]) -> Result<Tensor, String> {
628    let mut tensor_items = Vec::new();
629
630    for item in items {
631        let tensor = match item {
632            Item::Scalar(Value::Int(n)) => Tensor::Scalar(*n as f64),
633            Item::Scalar(Value::Float(f)) => Tensor::Scalar(*f),
634            Item::Scalar(Value::Tensor(t)) => t.clone(),
635            Item::Object(obj) if obj.len() == 1 => {
636                // Nested tensor element (object with only "item" key containing tensor)
637                if let Some(Item::Scalar(Value::Tensor(t))) = obj.get("item") {
638                    t.clone()
639                } else {
640                    return Err("Cannot convert non-numeric item to tensor".to_string());
641                }
642            }
643            _ => return Err("Cannot convert non-numeric item to tensor".to_string()),
644        };
645        tensor_items.push(tensor);
646    }
647
648    Ok(Tensor::Array(tensor_items))
649}
650
651#[cfg(test)]
652mod tests {
653    use super::*;
654
655    // ==================== FromXmlConfig tests ====================
656
657    #[test]
658    fn test_from_xml_config_default() {
659        let config = FromXmlConfig::default();
660        assert_eq!(config.default_type_name, "Item");
661        assert_eq!(config.version, (1, 0));
662        assert!(config.infer_lists);
663    }
664
665    #[test]
666    fn test_from_xml_config_debug() {
667        let config = FromXmlConfig::default();
668        let debug = format!("{:?}", config);
669        assert!(debug.contains("FromXmlConfig"));
670        assert!(debug.contains("default_type_name"));
671        assert!(debug.contains("version"));
672        assert!(debug.contains("infer_lists"));
673    }
674
675    #[test]
676    fn test_from_xml_config_clone() {
677        let config = FromXmlConfig {
678            default_type_name: "Custom".to_string(),
679            version: (2, 1),
680            infer_lists: false,
681        };
682        let cloned = config.clone();
683        assert_eq!(cloned.default_type_name, "Custom");
684        assert_eq!(cloned.version, (2, 1));
685        assert!(!cloned.infer_lists);
686    }
687
688    #[test]
689    fn test_from_xml_config_custom() {
690        let config = FromXmlConfig {
691            default_type_name: "MyType".to_string(),
692            version: (3, 5),
693            infer_lists: false,
694        };
695        assert_eq!(config.default_type_name, "MyType");
696        assert_eq!(config.version, (3, 5));
697        assert!(!config.infer_lists);
698    }
699
700    // ==================== parse_value tests ====================
701
702    #[test]
703    fn test_parse_value_empty() {
704        assert_eq!(parse_value("").unwrap(), Value::Null);
705        assert_eq!(parse_value("   ").unwrap(), Value::Null);
706    }
707
708    #[test]
709    fn test_parse_value_bool_true() {
710        assert_eq!(parse_value("true").unwrap(), Value::Bool(true));
711    }
712
713    #[test]
714    fn test_parse_value_bool_false() {
715        assert_eq!(parse_value("false").unwrap(), Value::Bool(false));
716    }
717
718    #[test]
719    fn test_parse_value_int_positive() {
720        assert_eq!(parse_value("42").unwrap(), Value::Int(42));
721    }
722
723    #[test]
724    fn test_parse_value_int_negative() {
725        assert_eq!(parse_value("-100").unwrap(), Value::Int(-100));
726    }
727
728    #[test]
729    fn test_parse_value_int_zero() {
730        assert_eq!(parse_value("0").unwrap(), Value::Int(0));
731    }
732
733    #[test]
734    fn test_parse_value_float_simple() {
735        if let Value::Float(f) = parse_value("3.5").unwrap() {
736            assert!((f - 3.5).abs() < 0.001);
737        } else {
738            panic!("Expected float");
739        }
740    }
741
742    #[test]
743    fn test_parse_value_float_negative() {
744        if let Value::Float(f) = parse_value("-2.5").unwrap() {
745            assert!((f + 2.5).abs() < 0.001);
746        } else {
747            panic!("Expected float");
748        }
749    }
750
751    #[test]
752    fn test_parse_value_string() {
753        assert_eq!(
754            parse_value("hello").unwrap(),
755            Value::String("hello".to_string())
756        );
757    }
758
759    #[test]
760    fn test_parse_value_string_with_spaces() {
761        assert_eq!(
762            parse_value("  hello world  ").unwrap(),
763            Value::String("hello world".to_string())
764        );
765    }
766
767    #[test]
768    fn test_parse_value_expression_identifier() {
769        if let Value::Expression(e) = parse_value("$(foo)").unwrap() {
770            assert_eq!(e.to_string(), "foo");
771        } else {
772            panic!("Expected expression");
773        }
774    }
775
776    #[test]
777    fn test_parse_value_expression_call() {
778        if let Value::Expression(e) = parse_value("$(add(x, 1))").unwrap() {
779            assert_eq!(e.to_string(), "add(x, 1)");
780        } else {
781            panic!("Expected expression");
782        }
783    }
784
785    #[test]
786    fn test_parse_value_at_string_not_reference() {
787        // Strings starting with @ are just strings, not references
788        if let Value::String(s) = parse_value("@not-a-ref").unwrap() {
789            assert_eq!(s, "@not-a-ref");
790        } else {
791            panic!("Expected string");
792        }
793    }
794
795    // ==================== parse_reference tests ====================
796
797    #[test]
798    fn test_parse_reference_local() {
799        let ref_val = parse_reference("@user123").unwrap();
800        assert_eq!(ref_val.type_name, None);
801        assert_eq!(ref_val.id, "user123");
802    }
803
804    #[test]
805    fn test_parse_reference_qualified() {
806        let ref_val = parse_reference("@User:123").unwrap();
807        assert_eq!(ref_val.type_name, Some("User".to_string()));
808        assert_eq!(ref_val.id, "123");
809    }
810
811    #[test]
812    fn test_parse_reference_with_special_chars() {
813        let ref_val = parse_reference("@my-item_123").unwrap();
814        assert_eq!(ref_val.type_name, None);
815        assert_eq!(ref_val.id, "my-item_123");
816    }
817
818    #[test]
819    fn test_parse_reference_invalid_no_at() {
820        let result = parse_reference("user123");
821        assert!(result.is_err());
822        assert!(result.unwrap_err().contains("Invalid reference format"));
823    }
824
825    // ==================== parse_version tests ====================
826
827    #[test]
828    fn test_parse_version_valid() {
829        assert_eq!(parse_version("1.0"), Some((1, 0)));
830        assert_eq!(parse_version("2.5"), Some((2, 5)));
831        assert_eq!(parse_version("10.20"), Some((10, 20)));
832    }
833
834    #[test]
835    fn test_parse_version_with_patch() {
836        // Only major.minor are taken
837        assert_eq!(parse_version("1.2.3"), Some((1, 2)));
838    }
839
840    #[test]
841    fn test_parse_version_invalid() {
842        assert_eq!(parse_version("invalid"), None);
843        assert_eq!(parse_version("1"), None);
844        assert_eq!(parse_version(""), None);
845        assert_eq!(parse_version("a.b"), None);
846    }
847
848    // ==================== to_hedl_key tests ====================
849
850    #[test]
851    fn test_to_hedl_key_pascal_case() {
852        assert_eq!(to_hedl_key("Category"), "category");
853        assert_eq!(to_hedl_key("UserPost"), "user_post");
854        assert_eq!(to_hedl_key("UserProfileSettings"), "user_profile_settings");
855    }
856
857    #[test]
858    fn test_to_hedl_key_acronyms() {
859        assert_eq!(to_hedl_key("XMLData"), "xmldata");
860        assert_eq!(to_hedl_key("HTTPResponse"), "httpresponse");
861    }
862
863    #[test]
864    fn test_to_hedl_key_lowercase() {
865        assert_eq!(to_hedl_key("users"), "users");
866        assert_eq!(to_hedl_key("category"), "category");
867    }
868
869    #[test]
870    fn test_to_hedl_key_mixed() {
871        assert_eq!(to_hedl_key("someXMLData"), "some_xmldata");
872        assert_eq!(to_hedl_key("getHTTPResponse"), "get_httpresponse");
873    }
874
875    #[test]
876    fn test_to_hedl_key_with_underscores() {
877        assert_eq!(to_hedl_key("user_name"), "user_name");
878        assert_eq!(to_hedl_key("_private"), "private");
879    }
880
881    // ==================== items_are_tensor_elements tests ====================
882
883    #[test]
884    fn test_items_are_tensor_elements_int_scalars() {
885        let items = vec![
886            Item::Scalar(Value::Int(1)),
887            Item::Scalar(Value::Int(2)),
888            Item::Scalar(Value::Int(3)),
889        ];
890        assert!(items_are_tensor_elements(&items));
891    }
892
893    #[test]
894    fn test_items_are_tensor_elements_float_scalars() {
895        let items = vec![
896            Item::Scalar(Value::Float(1.0)),
897            Item::Scalar(Value::Float(2.0)),
898        ];
899        assert!(items_are_tensor_elements(&items));
900    }
901
902    #[test]
903    fn test_items_are_tensor_elements_tensors() {
904        let items = vec![
905            Item::Scalar(Value::Tensor(Tensor::Scalar(1.0))),
906            Item::Scalar(Value::Tensor(Tensor::Scalar(2.0))),
907        ];
908        assert!(items_are_tensor_elements(&items));
909    }
910
911    #[test]
912    fn test_items_are_tensor_elements_mixed_numeric() {
913        let items = vec![Item::Scalar(Value::Int(1)), Item::Scalar(Value::Float(2.0))];
914        assert!(items_are_tensor_elements(&items));
915    }
916
917    #[test]
918    fn test_items_are_tensor_elements_with_strings() {
919        let items = vec![
920            Item::Scalar(Value::Int(1)),
921            Item::Scalar(Value::String("hello".to_string())),
922        ];
923        assert!(!items_are_tensor_elements(&items));
924    }
925
926    #[test]
927    fn test_items_are_tensor_elements_empty() {
928        let items: Vec<Item> = vec![];
929        assert!(items_are_tensor_elements(&items));
930    }
931
932    // ==================== items_to_tensor tests ====================
933
934    #[test]
935    fn test_items_to_tensor_int_scalars() {
936        let items = vec![
937            Item::Scalar(Value::Int(1)),
938            Item::Scalar(Value::Int(2)),
939            Item::Scalar(Value::Int(3)),
940        ];
941        let tensor = items_to_tensor(&items).unwrap();
942        if let Tensor::Array(arr) = tensor {
943            assert_eq!(arr.len(), 3);
944            assert_eq!(arr[0], Tensor::Scalar(1.0));
945        } else {
946            panic!("Expected array");
947        }
948    }
949
950    #[test]
951    fn test_items_to_tensor_float_scalars() {
952        let items = vec![
953            Item::Scalar(Value::Float(1.5)),
954            Item::Scalar(Value::Float(2.5)),
955        ];
956        let tensor = items_to_tensor(&items).unwrap();
957        if let Tensor::Array(arr) = tensor {
958            assert_eq!(arr.len(), 2);
959            assert_eq!(arr[0], Tensor::Scalar(1.5));
960        } else {
961            panic!("Expected array");
962        }
963    }
964
965    #[test]
966    fn test_items_to_tensor_invalid() {
967        let items = vec![Item::Scalar(Value::String("hello".to_string()))];
968        let result = items_to_tensor(&items);
969        assert!(result.is_err());
970    }
971
972    // ==================== from_xml basic tests ====================
973
974    #[test]
975    fn test_empty_document() {
976        let xml = r#"<?xml version="1.0" encoding="UTF-8"?><hedl></hedl>"#;
977        let config = FromXmlConfig::default();
978        let doc = from_xml(xml, &config).unwrap();
979        assert_eq!(doc.root.len(), 0);
980    }
981
982    #[test]
983    fn test_empty_document_self_closing() {
984        let xml = r#"<?xml version="1.0" encoding="UTF-8"?><hedl/>"#;
985        let config = FromXmlConfig::default();
986        let doc = from_xml(xml, &config).unwrap();
987        assert_eq!(doc.root.len(), 0);
988    }
989
990    #[test]
991    fn test_scalar_bool_true() {
992        let xml = r#"<?xml version="1.0"?><hedl><val>true</val></hedl>"#;
993        let config = FromXmlConfig::default();
994        let doc = from_xml(xml, &config).unwrap();
995        assert_eq!(
996            doc.root.get("val").and_then(|i| i.as_scalar()),
997            Some(&Value::Bool(true))
998        );
999    }
1000
1001    #[test]
1002    fn test_scalar_bool_false() {
1003        let xml = r#"<?xml version="1.0"?><hedl><val>false</val></hedl>"#;
1004        let config = FromXmlConfig::default();
1005        let doc = from_xml(xml, &config).unwrap();
1006        assert_eq!(
1007            doc.root.get("val").and_then(|i| i.as_scalar()),
1008            Some(&Value::Bool(false))
1009        );
1010    }
1011
1012    #[test]
1013    fn test_scalar_int() {
1014        let xml = r#"<?xml version="1.0"?><hedl><val>42</val></hedl>"#;
1015        let config = FromXmlConfig::default();
1016        let doc = from_xml(xml, &config).unwrap();
1017        assert_eq!(
1018            doc.root.get("val").and_then(|i| i.as_scalar()),
1019            Some(&Value::Int(42))
1020        );
1021    }
1022
1023    #[test]
1024    fn test_scalar_float() {
1025        let xml = r#"<?xml version="1.0"?><hedl><val>3.5</val></hedl>"#;
1026        let config = FromXmlConfig::default();
1027        let doc = from_xml(xml, &config).unwrap();
1028        if let Some(Item::Scalar(Value::Float(f))) = doc.root.get("val") {
1029            assert!((f - 3.5).abs() < 0.001);
1030        } else {
1031            panic!("Expected float");
1032        }
1033    }
1034
1035    #[test]
1036    fn test_scalar_string() {
1037        let xml = r#"<?xml version="1.0"?><hedl><val>hello</val></hedl>"#;
1038        let config = FromXmlConfig::default();
1039        let doc = from_xml(xml, &config).unwrap();
1040        assert_eq!(
1041            doc.root.get("val").and_then(|i| i.as_scalar()),
1042            Some(&Value::String("hello".to_string()))
1043        );
1044    }
1045
1046    #[test]
1047    fn test_scalar_null_empty_element() {
1048        let xml = r#"<?xml version="1.0"?><hedl><val></val></hedl>"#;
1049        let config = FromXmlConfig::default();
1050        let doc = from_xml(xml, &config).unwrap();
1051        assert_eq!(
1052            doc.root.get("val").and_then(|i| i.as_scalar()),
1053            Some(&Value::Null)
1054        );
1055    }
1056
1057    #[test]
1058    fn test_scalar_expression() {
1059        let xml = r#"<?xml version="1.0"?><hedl><val>$(foo)</val></hedl>"#;
1060        let config = FromXmlConfig::default();
1061        let doc = from_xml(xml, &config).unwrap();
1062        if let Some(Item::Scalar(Value::Expression(e))) = doc.root.get("val") {
1063            assert_eq!(e.to_string(), "foo");
1064        } else {
1065            panic!("Expected expression");
1066        }
1067    }
1068
1069    // ==================== Nested object tests ====================
1070
1071    #[test]
1072    fn test_nested_object() {
1073        let xml = r#"<?xml version="1.0"?>
1074        <hedl>
1075            <config>
1076                <name>test</name>
1077                <value>100</value>
1078            </config>
1079        </hedl>"#;
1080
1081        let config = FromXmlConfig::default();
1082        let doc = from_xml(xml, &config).unwrap();
1083
1084        let config_item = doc.root.get("config").unwrap();
1085        assert!(config_item.as_object().is_some());
1086
1087        if let Item::Object(obj) = config_item {
1088            assert!(obj.contains_key("name"));
1089            assert!(obj.contains_key("value"));
1090        }
1091    }
1092
1093    #[test]
1094    fn test_deeply_nested_object() {
1095        let xml = r#"<?xml version="1.0"?>
1096        <hedl>
1097            <outer>
1098                <inner>
1099                    <deep>42</deep>
1100                </inner>
1101            </outer>
1102        </hedl>"#;
1103
1104        let config = FromXmlConfig::default();
1105        let doc = from_xml(xml, &config).unwrap();
1106
1107        if let Some(Item::Object(outer)) = doc.root.get("outer") {
1108            if let Some(Item::Object(inner)) = outer.get("inner") {
1109                if let Some(Item::Scalar(Value::Int(n))) = inner.get("deep") {
1110                    assert_eq!(*n, 42);
1111                } else {
1112                    panic!("Expected int");
1113                }
1114            } else {
1115                panic!("Expected inner object");
1116            }
1117        } else {
1118            panic!("Expected outer object");
1119        }
1120    }
1121
1122    // ==================== List inference tests ====================
1123
1124    #[test]
1125    fn test_infer_list_repeated_elements() {
1126        let xml = r#"<?xml version="1.0"?>
1127        <hedl>
1128            <user id="1"><name>Alice</name></user>
1129            <user id="2"><name>Bob</name></user>
1130        </hedl>"#;
1131
1132        let config = FromXmlConfig {
1133            infer_lists: true,
1134            ..Default::default()
1135        };
1136        let doc = from_xml(xml, &config).unwrap();
1137
1138        if let Some(Item::List(list)) = doc.root.get("user") {
1139            assert_eq!(list.rows.len(), 2);
1140        } else {
1141            panic!("Expected list");
1142        }
1143    }
1144
1145    #[test]
1146    fn test_no_infer_list_single_element() {
1147        let xml = r#"<?xml version="1.0"?>
1148        <hedl>
1149            <user id="1"><name>Alice</name></user>
1150        </hedl>"#;
1151
1152        let config = FromXmlConfig {
1153            infer_lists: true,
1154            ..Default::default()
1155        };
1156        let doc = from_xml(xml, &config).unwrap();
1157
1158        // Single element should remain as object
1159        assert!(doc.root.get("user").and_then(|i| i.as_object()).is_some());
1160    }
1161
1162    #[test]
1163    fn test_infer_list_disabled() {
1164        let xml = r#"<?xml version="1.0"?>
1165        <hedl>
1166            <user id="1"><name>Alice</name></user>
1167            <user id="2"><name>Bob</name></user>
1168        </hedl>"#;
1169
1170        let config = FromXmlConfig {
1171            infer_lists: false,
1172            ..Default::default()
1173        };
1174        let doc = from_xml(xml, &config).unwrap();
1175
1176        // With infer_lists disabled, second element overwrites first
1177        assert!(doc.root.get("user").and_then(|i| i.as_object()).is_some());
1178    }
1179
1180    // ==================== Attribute parsing tests ====================
1181
1182    #[test]
1183    fn test_attributes_to_object() {
1184        let xml = r#"<?xml version="1.0"?>
1185        <hedl>
1186            <item id="123" name="test" active="true"/>
1187        </hedl>"#;
1188
1189        let config = FromXmlConfig::default();
1190        let doc = from_xml(xml, &config).unwrap();
1191
1192        if let Some(Item::Object(obj)) = doc.root.get("item") {
1193            assert_eq!(
1194                obj.get("id").and_then(|i| i.as_scalar()),
1195                Some(&Value::Int(123))
1196            );
1197            assert_eq!(
1198                obj.get("name").and_then(|i| i.as_scalar()),
1199                Some(&Value::String("test".to_string()))
1200            );
1201            assert_eq!(
1202                obj.get("active").and_then(|i| i.as_scalar()),
1203                Some(&Value::Bool(true))
1204            );
1205        } else {
1206            panic!("Expected object");
1207        }
1208    }
1209
1210    #[test]
1211    fn test_single_value_attribute() {
1212        let xml = r#"<?xml version="1.0"?>
1213        <hedl>
1214            <item value="42"/>
1215        </hedl>"#;
1216
1217        let config = FromXmlConfig::default();
1218        let doc = from_xml(xml, &config).unwrap();
1219
1220        assert_eq!(
1221            doc.root.get("item").and_then(|i| i.as_scalar()),
1222            Some(&Value::Int(42))
1223        );
1224    }
1225
1226    // ==================== Version parsing from root ====================
1227
1228    #[test]
1229    fn test_version_from_root_attribute() {
1230        let xml = r#"<?xml version="1.0"?><hedl version="2.5"></hedl>"#;
1231        let config = FromXmlConfig::default();
1232        let doc = from_xml(xml, &config).unwrap();
1233        assert_eq!(doc.version, (2, 5));
1234    }
1235
1236    #[test]
1237    fn test_version_default() {
1238        let xml = r#"<?xml version="1.0"?><hedl></hedl>"#;
1239        let config = FromXmlConfig {
1240            version: (3, 1),
1241            ..Default::default()
1242        };
1243        let doc = from_xml(xml, &config).unwrap();
1244        assert_eq!(doc.version, (3, 1));
1245    }
1246
1247    // ==================== Reference with marker attribute ====================
1248
1249    #[test]
1250    fn test_reference_with_marker() {
1251        let xml = r#"<?xml version="1.0"?>
1252        <hedl>
1253            <ref __hedl_type__="ref">@user123</ref>
1254        </hedl>"#;
1255
1256        let config = FromXmlConfig::default();
1257        let doc = from_xml(xml, &config).unwrap();
1258
1259        if let Some(Item::Scalar(Value::Reference(r))) = doc.root.get("ref") {
1260            assert_eq!(r.id, "user123");
1261        } else {
1262            panic!("Expected reference");
1263        }
1264    }
1265
1266    #[test]
1267    fn test_qualified_reference_with_marker() {
1268        let xml = r#"<?xml version="1.0"?>
1269        <hedl>
1270            <ref __hedl_type__="ref">@User:456</ref>
1271        </hedl>"#;
1272
1273        let config = FromXmlConfig::default();
1274        let doc = from_xml(xml, &config).unwrap();
1275
1276        if let Some(Item::Scalar(Value::Reference(r))) = doc.root.get("ref") {
1277            assert_eq!(r.type_name, Some("User".to_string()));
1278            assert_eq!(r.id, "456");
1279        } else {
1280            panic!("Expected reference");
1281        }
1282    }
1283
1284    // ==================== Error cases ====================
1285
1286    #[test]
1287    fn test_empty_input() {
1288        // Empty input should produce an empty document
1289        let xml = "";
1290        let config = FromXmlConfig::default();
1291        let doc = from_xml(xml, &config).unwrap();
1292        assert!(doc.root.is_empty());
1293    }
1294
1295    #[test]
1296    fn test_only_declaration() {
1297        // Only XML declaration should produce an empty document
1298        let xml = r#"<?xml version="1.0"?>"#;
1299        let config = FromXmlConfig::default();
1300        let doc = from_xml(xml, &config).unwrap();
1301        assert!(doc.root.is_empty());
1302    }
1303
1304    // ==================== Edge cases ====================
1305
1306    #[test]
1307    fn test_unicode_content() {
1308        let xml = r#"<?xml version="1.0" encoding="UTF-8"?>
1309        <hedl>
1310            <name>héllo 世界</name>
1311        </hedl>"#;
1312
1313        let config = FromXmlConfig::default();
1314        let doc = from_xml(xml, &config).unwrap();
1315
1316        assert_eq!(
1317            doc.root.get("name").and_then(|i| i.as_scalar()),
1318            Some(&Value::String("héllo 世界".to_string()))
1319        );
1320    }
1321
1322    #[test]
1323    fn test_whitespace_handling() {
1324        let xml = r#"<?xml version="1.0"?>
1325        <hedl>
1326            <val>   hello world   </val>
1327        </hedl>"#;
1328
1329        let config = FromXmlConfig::default();
1330        let doc = from_xml(xml, &config).unwrap();
1331
1332        // Whitespace should be trimmed
1333        assert_eq!(
1334            doc.root.get("val").and_then(|i| i.as_scalar()),
1335            Some(&Value::String("hello world".to_string()))
1336        );
1337    }
1338
1339    #[test]
1340    fn test_cdata_content() {
1341        let xml = r#"<?xml version="1.0"?>
1342        <hedl>
1343            <text><![CDATA[<not>xml</not>]]></text>
1344        </hedl>"#;
1345
1346        let config = FromXmlConfig::default();
1347        let doc = from_xml(xml, &config).unwrap();
1348
1349        // CDATA content should be preserved
1350        assert!(doc.root.contains_key("text"));
1351    }
1352
1353    #[test]
1354    fn test_key_conversion_from_pascal_case() {
1355        let xml = r#"<?xml version="1.0"?>
1356        <hedl>
1357            <UserName>test</UserName>
1358        </hedl>"#;
1359
1360        let config = FromXmlConfig::default();
1361        let doc = from_xml(xml, &config).unwrap();
1362
1363        // UserName should be converted to user_name
1364        assert!(doc.root.contains_key("user_name"));
1365    }
1366}