Skip to main content

hedl_json/from_json/
converter.rs

1// Dweve HEDL - Hierarchical Entity Data Language
2//
3// Copyright (c) 2025 Dweve IP B.V. and individual contributors.
4//
5// SPDX-License-Identifier: Apache-2.0
6//
7// Licensed under the Apache License, Version 2.0 (the "License");
8// you may not use this file except in compliance with the License.
9// You may obtain a copy of the License in the LICENSE file at the
10// root of this repository or at: http://www.apache.org/licenses/LICENSE-2.0
11//
12// Unless required by applicable law or agreed to in writing, software
13// distributed under the License is distributed on an "AS IS" BASIS,
14// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15// See the License for the specific language governing permissions and
16// limitations under the License.
17
18//! Core JSON to HEDL conversion functions
19
20use super::array_conversion::{
21    is_object_array, is_tensor_array, json_array_to_list, json_array_to_matrix_list,
22    json_array_to_tensor, json_array_to_tensor_owned,
23};
24use super::auto_nesting::{auto_nest_by_fk, infer_nests_from_children};
25use super::config::{json_number_to_value, FromJsonConfig, JsonConversionError, SchemaCache};
26use super::surrogate::preprocess_json_for_surrogates;
27use super::uniform_schema::try_convert_uniform_object_to_matrixlist;
28use super::SurrogatePolicy;
29use hedl_core::convert::parse_reference;
30use hedl_core::lex::parse_expression_token;
31use hedl_core::{Document, Item, Value};
32use serde_json::{Map, Value as JsonValue};
33use std::collections::BTreeMap;
34
35/// Convert JSON string to HEDL Document
36///
37/// # Arguments
38///
39/// * `json` - JSON string to parse
40/// * `config` - Configuration for import behavior and security limits
41///
42/// # Returns
43///
44/// * `Ok(Document)` - Successfully parsed HEDL document
45/// * `Err(JsonConversionError)` - Parsing or validation error
46pub fn from_json(json: &str, config: &FromJsonConfig) -> Result<Document, JsonConversionError> {
47    // Preprocess for surrogate handling if policy is not Reject
48    let processed = preprocess_json_for_surrogates(json, config.surrogate_policy)?;
49    let json_to_parse = if config.surrogate_policy == SurrogatePolicy::Reject {
50        json
51    } else {
52        &processed
53    };
54
55    #[cfg(feature = "lenient")]
56    let value: JsonValue = if config.lenient {
57        serde_jsonrc::from_str(json_to_parse)
58            .map_err(|e| JsonConversionError::ParseError(e.to_string()))?
59    } else {
60        serde_json::from_str(json_to_parse)?
61    };
62
63    #[cfg(not(feature = "lenient"))]
64    let value: JsonValue = serde_json::from_str(json_to_parse)?;
65
66    from_json_value(&value, config)
67}
68
69/// Convert `serde_json::Value` to HEDL Document
70///
71/// # Arguments
72///
73/// * `value` - Parsed JSON value (must be an object)
74/// * `config` - Configuration for import behavior and security limits
75///
76/// # Returns
77///
78/// * `Ok(Document)` - Successfully converted HEDL document
79/// * `Err(JsonConversionError)` - Validation error
80///
81/// # Examples
82///
83/// ```text
84/// use hedl_json::{from_json_value, FromJsonConfig};
85/// use serde_json::json;
86///
87/// let value = json!({"users": [{"id": "alice"}]});
88/// let config = FromJsonConfig::default();
89/// let doc = from_json_value(&value, &config).unwrap();
90/// ```
91pub fn from_json_value(
92    value: &JsonValue,
93    config: &FromJsonConfig,
94) -> Result<Document, JsonConversionError> {
95    let mut structs = BTreeMap::new();
96    let mut schema_cache = SchemaCache::new();
97    let root = match value {
98        JsonValue::Object(map) => {
99            json_object_to_root(map, config, &mut structs, &mut schema_cache, 0)?
100        }
101        JsonValue::Array(arr) => {
102            // Root-level arrays are valid JSON - convert to a single "items" list
103            if arr.is_empty() {
104                BTreeMap::new()
105            } else if is_object_array(arr) {
106                let list = json_array_to_matrix_list(
107                    arr,
108                    "items",
109                    config,
110                    &mut structs,
111                    &mut schema_cache,
112                    0,
113                )?;
114                let mut root = BTreeMap::new();
115                root.insert("items".to_string(), Item::List(list));
116                root
117            } else if is_tensor_array(arr) {
118                let tensor = json_array_to_tensor(arr, config, 0)?;
119                let mut root = BTreeMap::new();
120                root.insert(
121                    "items".to_string(),
122                    Item::Scalar(Value::Tensor(Box::new(tensor))),
123                );
124                root
125            } else {
126                // Mixed/primitive array
127                let list_value = json_array_to_list(arr, config)?;
128                let mut root = BTreeMap::new();
129                root.insert("items".to_string(), Item::Scalar(list_value));
130                root
131            }
132        }
133        _ => return Err(JsonConversionError::InvalidRoot(format!("{value:?}"))),
134    };
135
136    let doc = Document {
137        version: config.version,
138        schema_versions: BTreeMap::new(),
139        aliases: BTreeMap::new(),
140        structs,
141        nests: BTreeMap::new(),
142        root,
143    };
144
145    // Auto-detect FK relationships and build nested hierarchies (for flat JSON)
146    let mut doc = auto_nest_by_fk(doc)?;
147
148    // Infer NEST declarations from existing children (for already-nested JSON)
149    infer_nests_from_children(&mut doc);
150
151    Ok(doc)
152}
153
154/// Convert owned `serde_json::Value` to HEDL Document with zero-copy optimization
155///
156/// This version accepts an owned `JsonValue` which allows for zero-copy string handling
157/// by moving strings instead of cloning them.
158///
159/// # Arguments
160///
161/// * `value` - Owned parsed JSON value (must be an object)
162/// * `config` - Configuration for import behavior and security limits
163///
164/// # Returns
165///
166/// * `Ok(Document)` - Successfully converted HEDL document
167/// * `Err(JsonConversionError)` - Validation error
168///
169/// # Performance
170///
171/// This function is optimized for reduced memory allocations by moving strings
172/// from the JSON value instead of cloning them. For large documents with many
173/// strings, this can reduce allocations by 30-50%.
174///
175/// # Examples
176///
177/// ```text
178/// use hedl_json::{from_json_value_owned, FromJsonConfig};
179/// use serde_json::json;
180///
181/// let value = json!({"users": [{"id": "alice"}]});
182/// let config = FromJsonConfig::default();
183/// let doc = from_json_value_owned(value, &config).unwrap();
184/// ```
185pub fn from_json_value_owned(
186    value: JsonValue,
187    config: &FromJsonConfig,
188) -> Result<Document, JsonConversionError> {
189    let mut structs = BTreeMap::new();
190    let mut schema_cache = SchemaCache::new();
191    let root = match value {
192        JsonValue::Object(map) => {
193            json_object_to_root_owned(map, config, &mut structs, &mut schema_cache, 0)?
194        }
195        JsonValue::Array(arr) => {
196            // Root-level arrays are valid JSON - convert to a single "items" list
197            if arr.is_empty() {
198                BTreeMap::new()
199            } else if is_object_array(&arr) {
200                let list = json_array_to_matrix_list(
201                    &arr,
202                    "items",
203                    config,
204                    &mut structs,
205                    &mut schema_cache,
206                    0,
207                )?;
208                let mut root = BTreeMap::new();
209                root.insert("items".to_string(), Item::List(list));
210                root
211            } else if is_tensor_array(&arr) {
212                let tensor = json_array_to_tensor_owned(arr, config, 0)?;
213                let mut root = BTreeMap::new();
214                root.insert(
215                    "items".to_string(),
216                    Item::Scalar(Value::Tensor(Box::new(tensor))),
217                );
218                root
219            } else {
220                // Mixed/primitive array
221                let list_value = json_array_to_list(&arr, config)?;
222                let mut root = BTreeMap::new();
223                root.insert("items".to_string(), Item::Scalar(list_value));
224                root
225            }
226        }
227        _ => {
228            return Err(JsonConversionError::InvalidRoot(
229                "Root must be an object or array".to_string(),
230            ))
231        }
232    };
233
234    let doc = Document {
235        version: config.version,
236        schema_versions: BTreeMap::new(),
237        aliases: BTreeMap::new(),
238        structs,
239        nests: BTreeMap::new(),
240        root,
241    };
242
243    // Auto-detect FK relationships and build nested hierarchies (for flat JSON)
244    let mut doc = auto_nest_by_fk(doc)?;
245
246    // Infer NEST declarations from existing children (for already-nested JSON)
247    infer_nests_from_children(&mut doc);
248
249    Ok(doc)
250}
251
252/// Process JSON object into HEDL item map, skipping metadata keys.
253/// This is the shared implementation used by both root and nested objects.
254///
255/// # Performance Optimization
256///
257/// Pre-allocates `BTreeMap` capacity to reduce allocation churn during object construction.
258/// Based on profiling, this reduces allocations by approximately 15-20% for object-heavy JSON.
259fn process_json_object_inner(
260    map: &Map<String, JsonValue>,
261    config: &FromJsonConfig,
262    structs: &mut BTreeMap<String, Vec<String>>,
263    schema_cache: &mut SchemaCache,
264    depth: usize,
265) -> Result<BTreeMap<String, Item>, JsonConversionError> {
266    // Check object size limit
267    if let Some(max_size) = config.max_object_size {
268        if map.len() > max_size {
269            return Err(JsonConversionError::MaxObjectSizeExceeded(
270                max_size,
271                map.len(),
272            ));
273        }
274    }
275
276    // OPTIMIZATION: Direct insertion for small objects (<32 keys),
277    // sorted batch insertion for large objects to minimize rebalancing
278    let mut result = BTreeMap::new();
279
280    if map.len() < 32 {
281        // Small objects: direct insertion is faster than sorting overhead
282        for (key, value) in map {
283            if key.starts_with("__") {
284                continue;
285            }
286            let item = json_value_to_item(value, key, config, structs, schema_cache, depth)?;
287            result.insert(key.clone(), item);
288        }
289    } else {
290        // Large objects: sorted batch insertion reduces BTreeMap rebalancing
291        let mut items: Vec<(String, Item)> = Vec::with_capacity(map.len());
292
293        for (key, value) in map {
294            if key.starts_with("__") {
295                continue;
296            }
297            let item = json_value_to_item(value, key, config, structs, schema_cache, depth)?;
298            items.push((key.clone(), item));
299        }
300
301        // Sort by key for optimal BTreeMap insertion order
302        items.sort_by(|a, b| a.0.cmp(&b.0));
303
304        // Batch insert in sorted order (minimal rebalancing)
305        for (key, item) in items {
306            result.insert(key, item);
307        }
308    }
309
310    Ok(result)
311}
312
313pub(super) fn json_object_to_root(
314    map: &Map<String, JsonValue>,
315    config: &FromJsonConfig,
316    structs: &mut BTreeMap<String, Vec<String>>,
317    schema_cache: &mut SchemaCache,
318    depth: usize,
319) -> Result<BTreeMap<String, Item>, JsonConversionError> {
320    process_json_object_inner(map, config, structs, schema_cache, depth)
321}
322
323/// Process owned JSON object into HEDL item map with zero-copy optimization
324fn json_object_to_root_owned(
325    map: Map<String, JsonValue>,
326    config: &FromJsonConfig,
327    structs: &mut BTreeMap<String, Vec<String>>,
328    schema_cache: &mut SchemaCache,
329    depth: usize,
330) -> Result<BTreeMap<String, Item>, JsonConversionError> {
331    // Check object size limit
332    if let Some(max_size) = config.max_object_size {
333        if map.len() > max_size {
334            return Err(JsonConversionError::MaxObjectSizeExceeded(
335                max_size,
336                map.len(),
337            ));
338        }
339    }
340
341    let mut result = BTreeMap::new();
342
343    for (key, value) in map {
344        // Skip metadata keys
345        if key.starts_with("__") {
346            continue;
347        }
348
349        let item = json_value_to_item_owned(value, &key, config, structs, schema_cache, depth)?;
350        result.insert(key, item);
351    }
352
353    Ok(result)
354}
355
356pub(super) fn json_object_to_item_map(
357    map: &Map<String, JsonValue>,
358    config: &FromJsonConfig,
359    structs: &mut BTreeMap<String, Vec<String>>,
360    schema_cache: &mut SchemaCache,
361    depth: usize,
362) -> Result<BTreeMap<String, Item>, JsonConversionError> {
363    process_json_object_inner(map, config, structs, schema_cache, depth)
364}
365
366pub(super) fn json_value_to_item(
367    value: &JsonValue,
368    key: &str,
369    config: &FromJsonConfig,
370    structs: &mut BTreeMap<String, Vec<String>>,
371    schema_cache: &mut SchemaCache,
372    depth: usize,
373) -> Result<Item, JsonConversionError> {
374    // Check recursion depth
375    if let Some(max_depth) = config.max_depth {
376        if depth >= max_depth {
377            return Err(JsonConversionError::MaxDepthExceeded(max_depth));
378        }
379    }
380
381    match value {
382        JsonValue::Null => Ok(Item::Scalar(Value::Null)),
383        JsonValue::Bool(b) => Ok(Item::Scalar(Value::Bool(*b))),
384        JsonValue::Number(n) => {
385            let value = json_number_to_value(n)?;
386            Ok(Item::Scalar(value))
387        }
388        JsonValue::String(s) => {
389            // Check string length limit
390            if let Some(max_len) = config.max_string_length {
391                if s.len() > max_len {
392                    return Err(JsonConversionError::MaxStringLengthExceeded(
393                        max_len,
394                        s.len(),
395                    ));
396                }
397            }
398
399            // Check for expression pattern $( ... )
400            if s.starts_with("$(") && s.ends_with(')') {
401                let expr = parse_expression_token(s)
402                    .map_err(|e| JsonConversionError::InvalidExpression(e.to_string()))?;
403                Ok(Item::Scalar(Value::Expression(Box::new(expr))))
404            } else {
405                // OPTIMIZATION: Zero-copy string handling
406                // Since serde_json already owns the string, we can move it instead of cloning
407                // when the JSON value is consumed. However, since we're working with &JsonValue,
408                // we need to clone. Use from_json_value_owned() for zero-copy optimization.
409                Ok(Item::Scalar(Value::String(s.clone().into_boxed_str())))
410            }
411        }
412        JsonValue::Array(arr) => {
413            // Check array size limit
414            if let Some(max_size) = config.max_array_size {
415                if arr.len() > max_size {
416                    return Err(JsonConversionError::MaxArraySizeExceeded(
417                        max_size,
418                        arr.len(),
419                    ));
420                }
421            }
422
423            // Handle empty arrays as empty lists
424            if arr.is_empty() {
425                Ok(Item::Scalar(Value::List(Box::default())))
426            } else if is_tensor_array(arr) {
427                // Check if it's a tensor (array of numbers)
428                let tensor = json_array_to_tensor(arr, config, depth + 1)?;
429                Ok(Item::Scalar(Value::Tensor(Box::new(tensor))))
430            } else if is_object_array(arr) {
431                // Convert to matrix list
432                let list =
433                    json_array_to_matrix_list(arr, key, config, structs, schema_cache, depth + 1)?;
434                Ok(Item::List(list))
435            } else {
436                // Primitive/mixed array (strings, bools, nulls, or heterogeneous)
437                // Convert to Value::List for non-numeric arrays
438                let list_value = json_array_to_list(arr, config)?;
439                Ok(Item::Scalar(list_value))
440            }
441        }
442        JsonValue::Object(obj) => {
443            // Check for special keys
444            if let Some(JsonValue::String(r)) = obj.get("@ref") {
445                return Ok(Item::Scalar(Value::Reference(
446                    parse_reference(r).map_err(JsonConversionError::InvalidReference)?,
447                )));
448            }
449
450            // Try to convert uniform-schema children to MatrixList
451            if let Some(list) = try_convert_uniform_object_to_matrixlist(
452                obj,
453                key,
454                config,
455                structs,
456                schema_cache,
457                depth + 1,
458            )? {
459                return Ok(Item::List(list));
460            }
461
462            // Regular object - process children recursively
463            let item_map = json_object_to_item_map(obj, config, structs, schema_cache, depth + 1)?;
464            Ok(Item::Object(item_map))
465        }
466    }
467}
468
469/// Convert owned JSON value to HEDL Item with zero-copy string optimization
470fn json_value_to_item_owned(
471    value: JsonValue,
472    key: &str,
473    config: &FromJsonConfig,
474    structs: &mut BTreeMap<String, Vec<String>>,
475    schema_cache: &mut SchemaCache,
476    depth: usize,
477) -> Result<Item, JsonConversionError> {
478    // Check recursion depth
479    if let Some(max_depth) = config.max_depth {
480        if depth >= max_depth {
481            return Err(JsonConversionError::MaxDepthExceeded(max_depth));
482        }
483    }
484
485    match value {
486        JsonValue::Null => Ok(Item::Scalar(Value::Null)),
487        JsonValue::Bool(b) => Ok(Item::Scalar(Value::Bool(b))),
488        JsonValue::Number(n) => {
489            let value = json_number_to_value(&n)?;
490            Ok(Item::Scalar(value))
491        }
492        JsonValue::String(s) => {
493            // Check string length limit
494            if let Some(max_len) = config.max_string_length {
495                if s.len() > max_len {
496                    return Err(JsonConversionError::MaxStringLengthExceeded(
497                        max_len,
498                        s.len(),
499                    ));
500                }
501            }
502
503            // Check for expression pattern $( ... )
504            if s.starts_with("$(") && s.ends_with(')') {
505                let expr = parse_expression_token(&s)
506                    .map_err(|e| JsonConversionError::InvalidExpression(e.to_string()))?;
507                Ok(Item::Scalar(Value::Expression(Box::new(expr))))
508            } else {
509                // ZERO-COPY OPTIMIZATION: Move the string instead of cloning
510                Ok(Item::Scalar(Value::String(s.into_boxed_str())))
511            }
512        }
513        JsonValue::Array(arr) => {
514            // Check array size limit
515            if let Some(max_size) = config.max_array_size {
516                if arr.len() > max_size {
517                    return Err(JsonConversionError::MaxArraySizeExceeded(
518                        max_size,
519                        arr.len(),
520                    ));
521                }
522            }
523
524            // Handle empty arrays as empty lists
525            if arr.is_empty() {
526                Ok(Item::Scalar(Value::List(Box::default())))
527            } else if is_tensor_array(&arr) {
528                // Check if it's a tensor (array of numbers)
529                let tensor = json_array_to_tensor_owned(arr, config, depth + 1)?;
530                Ok(Item::Scalar(Value::Tensor(Box::new(tensor))))
531            } else if is_object_array(&arr) {
532                // Convert to matrix list
533                let list =
534                    json_array_to_matrix_list(&arr, key, config, structs, schema_cache, depth + 1)?;
535                Ok(Item::List(list))
536            } else {
537                // Primitive/mixed array (strings, bools, nulls, or heterogeneous)
538                // Convert to Value::List for non-numeric arrays
539                let list_value = json_array_to_list(&arr, config)?;
540                Ok(Item::Scalar(list_value))
541            }
542        }
543        JsonValue::Object(obj) => {
544            // Check for special keys
545            if let Some(JsonValue::String(r)) = obj.get("@ref") {
546                return Ok(Item::Scalar(Value::Reference(
547                    parse_reference(r).map_err(JsonConversionError::InvalidReference)?,
548                )));
549            }
550
551            // Try to convert uniform-schema children to MatrixList
552            if let Some(list) = try_convert_uniform_object_to_matrixlist(
553                &obj,
554                key,
555                config,
556                structs,
557                schema_cache,
558                depth + 1,
559            )? {
560                return Ok(Item::List(list));
561            }
562
563            // Regular object - convert owned map
564            let item_map = json_object_to_item_map(&obj, config, structs, schema_cache, depth + 1)?;
565            Ok(Item::Object(item_map))
566        }
567    }
568}