hedl_json/
from_json.rs

1// Dweve HEDL - Hierarchical Entity Data Language
2//
3// Copyright (c) 2025 Dweve IP B.V. and individual contributors.
4//
5// SPDX-License-Identifier: Apache-2.0
6//
7// Licensed under the Apache License, Version 2.0 (the "License");
8// you may not use this file except in compliance with the License.
9// You may obtain a copy of the License in the LICENSE file at the
10// root of this repository or at: http://www.apache.org/licenses/LICENSE-2.0
11//
12// Unless required by applicable law or agreed to in writing, software
13// distributed under the License is distributed on an "AS IS" BASIS,
14// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15// See the License for the specific language governing permissions and
16// limitations under the License.
17
18//! JSON to HEDL conversion
19
20use crate::DEFAULT_SCHEMA;
21use hedl_core::convert::parse_reference;
22use hedl_core::lex::Tensor;
23use hedl_core::lex::{parse_expression_token, singularize_and_capitalize};
24use hedl_core::{Document, Item, MatrixList, Node, Value};
25use serde_json::{Map, Value as JsonValue};
26use smallvec::SmallVec;
27use std::collections::{BTreeMap, HashMap};
28
29/// Default maximum recursion depth for JSON parsing
30///
31/// Set to 10,000 levels to handle deeply nested JSON structures.
32/// This is significantly higher than typical JSON depth but prevents
33/// stack overflow from malicious or malformed inputs.
34pub const DEFAULT_MAX_DEPTH: usize = 10_000;
35
36/// Default maximum array size for JSON parsing
37///
38/// Set to 10,000,000 elements to handle large datasets, including
39/// large arrays commonly found in data science and ML applications.
40pub const DEFAULT_MAX_ARRAY_SIZE: usize = 10_000_000;
41
42/// Default maximum string length for JSON parsing
43///
44/// Set to 100 MB to handle large strings including base64-encoded
45/// binary data, large text fields, and embedded documents.
46pub const DEFAULT_MAX_STRING_LENGTH: usize = 100 * 1024 * 1024;
47
48/// Default maximum object size (number of keys)
49///
50/// Set to 100,000 keys to handle objects with many properties,
51/// common in configuration files and metadata-rich documents.
52pub const DEFAULT_MAX_OBJECT_SIZE: usize = 100_000;
53
54/// Policy for handling unpaired UTF-16 surrogates in JSON input
55///
56/// JSON's `\uXXXX` escapes use UTF-16 encoding. Characters outside the
57/// Basic Multilingual Plane (U+10000+, including emoji) require surrogate
58/// pairs: a high surrogate (0xD800-0xDBFF) followed immediately by a low
59/// surrogate (0xDC00-0xDFFF).
60///
61/// Some systems (e.g., JavaScript with truncated strings, legacy databases)
62/// may emit unpaired surrogates, which are technically invalid Unicode but
63/// may appear in real-world data.
64///
65/// # Example
66///
67/// ```text
68/// use hedl_json::{FromJsonConfig, SurrogatePolicy};
69///
70/// // Default: reject unpaired surrogates
71/// let strict = FromJsonConfig::default();
72///
73/// // Replace unpaired surrogates with U+FFFD
74/// let lenient = FromJsonConfig::builder()
75///     .surrogate_policy(SurrogatePolicy::ReplaceWithFFFD)
76///     .build();
77///
78/// // Skip (remove) unpaired surrogates entirely
79/// let skip = FromJsonConfig::builder()
80///     .surrogate_policy(SurrogatePolicy::Skip)
81///     .build();
82/// ```
83#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
84pub enum SurrogatePolicy {
85    /// Reject unpaired surrogates with an error (default, strict)
86    ///
87    /// This is the safest option and ensures all processed JSON contains
88    /// valid Unicode. Use this for data integrity requirements.
89    #[default]
90    Reject,
91
92    /// Replace unpaired surrogates with U+FFFD (replacement character)
93    ///
94    /// This allows processing of JSON with invalid Unicode while preserving
95    /// string structure. The replacement character (�) signals data loss.
96    ReplaceWithFFFD,
97
98    /// Skip (remove) unpaired surrogates silently
99    ///
100    /// Use with caution: this modifies string content without indication.
101    /// Suitable when the surrogates are known to be noise or artifacts.
102    Skip,
103}
104
105/// Errors that can occur during JSON to HEDL conversion
106#[derive(Debug, Clone, thiserror::Error)]
107pub enum JsonConversionError {
108    /// JSON parsing failed
109    #[error("JSON parse error: {0}")]
110    ParseError(String),
111
112    /// Root value must be an object
113    #[error("Root must be a JSON object, found {0}")]
114    InvalidRoot(String),
115
116    /// Invalid number value
117    #[error("Invalid number: {0}")]
118    InvalidNumber(String),
119
120    /// Invalid expression syntax
121    #[error("Invalid expression: {0}")]
122    InvalidExpression(String),
123
124    /// Invalid tensor element
125    #[error("Invalid tensor element - must be number or array")]
126    InvalidTensor,
127
128    /// Nested objects not allowed in scalar context
129    #[error("Nested objects not allowed in scalar context")]
130    NestedObject,
131
132    /// Reference parsing failed
133    #[error("Invalid reference: {0}")]
134    InvalidReference(String),
135
136    /// Invalid Unicode encoding
137    ///
138    /// This error occurs when JSON contains invalid Unicode sequences, such as:
139    /// - Unpaired UTF-16 surrogates (`\uD83D` without its low surrogate pair)
140    /// - Invalid surrogate pairs (low surrogate before high surrogate)
141    /// - Unescaped control characters in strings
142    ///
143    /// # UTF-16 Surrogate Background
144    ///
145    /// JSON's `\uXXXX` escapes use UTF-16 encoding. Characters outside the
146    /// Basic Multilingual Plane (U+10000 and above, including emoji) require
147    /// surrogate pairs: a high surrogate (0xD800-0xDBFF) followed by a low
148    /// surrogate (0xDC00-0xDFFF).
149    ///
150    /// # Solutions
151    ///
152    /// 1. **Use the `SurrogatePolicy::ReplaceWithFFFD` option**:
153    ///    Replace invalid surrogates with the Unicode replacement character.
154    ///
155    /// 2. **Preprocess the JSON** to fix or remove invalid sequences.
156    ///
157    /// 3. **Ensure the source system** produces valid UTF-8/UTF-16 pairs.
158    #[error("Invalid Unicode: {0}")]
159    InvalidUnicode(String),
160
161    /// Maximum recursion depth exceeded
162    #[error("Maximum recursion depth ({0}) exceeded - possible deeply nested structure")]
163    MaxDepthExceeded(usize),
164
165    /// Maximum array size exceeded
166    #[error("Maximum array size ({0}) exceeded - array has {1} elements")]
167    MaxArraySizeExceeded(usize, usize),
168
169    /// Maximum string length exceeded
170    #[error("Maximum string length ({0}) exceeded - string has {1} characters")]
171    MaxStringLengthExceeded(usize, usize),
172
173    /// Maximum object size exceeded
174    #[error("Maximum object size ({0}) exceeded - object has {1} keys")]
175    MaxObjectSizeExceeded(usize, usize),
176
177    /// Integer value outside i64 range
178    ///
179    /// JSON supports arbitrary-precision numbers, but HEDL's `Value::Int`
180    /// uses `i64` which has a fixed range: -9,223,372,036,854,775,808 to
181    /// 9,223,372,036,854,775,807.
182    ///
183    /// # Common Causes
184    ///
185    /// - Twitter/Snowflake IDs (often exceed `i64::MAX`)
186    /// - Unsigned 64-bit integers from other systems
187    /// - Large database auto-increment IDs
188    /// - Timestamps in nanoseconds beyond year 2262
189    ///
190    /// # Solutions
191    ///
192    /// 1. **Use strings for large IDs** (recommended):
193    ///    ```json
194    ///    {"tweet_id": "18446744073709551615"}
195    ///    ```
196    ///
197    /// 2. **Use hex encoding**:
198    ///    ```json
199    ///    {"large_number": "0xFFFFFFFFFFFFFFFF"}
200    ///    ```
201    ///
202    /// 3. **Split into high/low parts**:
203    ///    ```json
204    ///    {"value_high": 1844674407, "value_low": 3709551615}
205    ///    ```
206    ///
207    /// # Examples
208    ///
209    /// ```
210    /// use hedl_json::{from_json, FromJsonConfig};
211    ///
212    /// let json = r#"{"id": 18446744073709551615}"#;
213    /// let result = from_json(json, &FromJsonConfig::default());
214    ///
215    /// assert!(result.is_err());
216    /// assert!(result.unwrap_err().to_string().contains("Integer overflow"));
217    /// ```
218    #[error(
219        "Integer overflow: {value} exceeds i64 range [{min}..{max}]. \
220         Consider using a string for large IDs or timestamps."
221    )]
222    IntegerOverflow {
223        /// String representation of the overflowing value.
224        value: String,
225        /// Maximum valid i64 value.
226        max: i64,
227        /// Minimum valid i64 value.
228        min: i64,
229    },
230}
231
232impl From<serde_json::Error> for JsonConversionError {
233    fn from(err: serde_json::Error) -> Self {
234        let msg = err.to_string();
235
236        // Detect surrogate-related errors from serde_json
237        if msg.contains("lone surrogate")
238            || msg.contains("surrogate")
239            || msg.contains("invalid unicode")
240        {
241            JsonConversionError::InvalidUnicode(format!(
242                "Invalid UTF-16 surrogate sequence: {msg}. \
243                 JSON contains unpaired surrogates which cannot be represented \
244                 in Rust UTF-8 strings. Configure SurrogatePolicy::ReplaceWithFFFD \
245                 to replace with the Unicode replacement character (U+FFFD)."
246            ))
247        } else if msg.contains("control character") {
248            JsonConversionError::InvalidUnicode(format!(
249                "Unescaped control character in JSON string: {msg}. \
250                 Control characters (U+0000-U+001F) must be escaped as \\uXXXX \
251                 per RFC 8259."
252            ))
253        } else {
254            JsonConversionError::ParseError(msg)
255        }
256    }
257}
258
259/// Check if a `serde_json::Number` represents an integer outside i64 range
260///
261/// Returns `true` if the number is an integer (not a float) but cannot
262/// fit in i64 range. This happens when:
263/// - The value is larger than `i64::MAX` (9,223,372,036,854,775,807)
264/// - The value is smaller than `i64::MIN` (-9,223,372,036,854,775,808)
265///
266/// # Implementation Note
267///
268/// `serde_json::Number::as_i64()` returns `None` for both:
269/// 1. Numbers outside i64 range (overflow)
270/// 2. Floating point numbers
271///
272/// We use `as_u64()` to detect case 1: if `as_i64()` fails but `as_u64()`
273/// succeeds, the number is an unsigned integer too large for i64.
274/// We also check `is_i64()` to catch negative overflow cases.
275#[inline]
276fn is_integer_overflow(n: &serde_json::Number) -> bool {
277    // If as_i64() fails but as_u64() succeeds, it's an unsigned int overflow
278    // Or if is_i64() is true but as_i64() is None, it's a signed int overflow
279    n.as_i64().is_none() && (n.as_u64().is_some() || n.is_i64())
280}
281
282/// Convert JSON number to HEDL Value with overflow detection
283///
284/// This function enforces strict integer validation to prevent silent
285/// precision loss from i64 overflow converting to f64.
286///
287/// # Behavior
288///
289/// 1. **i64 range integers**: Convert to `Value::Int(i64)`
290/// 2. **Overflow integers**: Return `IntegerOverflow` error
291/// 3. **Floating point**: Convert to `Value::Float(f64)`
292///
293/// # Implementation Details
294///
295/// - Valid i64 values are converted to `Value::Int`
296/// - Integer values outside i64 range trigger `IntegerOverflow` error
297/// - Floating point values are converted to `Value::Float`
298/// - Uses fast-path optimization for common i64 case
299#[inline]
300fn json_number_to_value(n: &serde_json::Number) -> Result<Value, JsonConversionError> {
301    // Try i64 first (most common case - fast path)
302    if let Some(i) = n.as_i64() {
303        return Ok(Value::Int(i));
304    }
305
306    // Check for integer overflow
307    if is_integer_overflow(n) {
308        return Err(JsonConversionError::IntegerOverflow {
309            value: n.to_string(),
310            max: i64::MAX,
311            min: i64::MIN,
312        });
313    }
314
315    // Must be a float
316    if let Some(f) = n.as_f64() {
317        Ok(Value::Float(f))
318    } else {
319        // Should never happen with valid JSON
320        Err(JsonConversionError::InvalidNumber(n.to_string()))
321    }
322}
323
324/// Configuration for JSON import
325///
326/// Controls how JSON is converted to HEDL, including security limits
327/// to prevent denial-of-service attacks from malicious inputs.
328///
329/// # High Default Limits
330///
331/// The default limits are set intentionally high to handle large-scale
332/// data processing scenarios common in ML/AI applications:
333///
334/// - **10,000 depth**: Deep nesting in complex hierarchical data
335/// - **10,000,000 array size**: Large datasets and batches
336/// - **100 MB string length**: Base64-encoded binary data, embeddings
337/// - **100,000 object size**: Rich metadata and configuration objects
338///
339/// These defaults prioritize functionality over restrictiveness. For
340/// untrusted input, consider using the builder pattern with custom limits.
341///
342/// # Examples
343///
344/// ```text
345/// use hedl_json::FromJsonConfig;
346///
347/// // Default configuration with high limits for ML/data workloads
348/// let config = FromJsonConfig::default();
349///
350/// // Custom configuration using builder pattern
351/// let custom_config = FromJsonConfig::builder()
352///     .max_depth(1_000)
353///     .max_array_size(100_000)
354///     .max_string_length(10 * 1024 * 1024) // 10 MB
355///     .build();
356///
357/// // Strict configuration for untrusted input
358/// let strict_config = FromJsonConfig::builder()
359///     .max_depth(50)
360///     .max_array_size(10_000)
361///     .max_string_length(1_000_000)
362///     .max_object_size(1_000)
363///     .build();
364///
365/// // Unlimited configuration (use with caution)
366/// let unlimited_config = FromJsonConfig::builder()
367///     .unlimited()
368///     .build();
369/// ```
370#[derive(Debug, Clone)]
371pub struct FromJsonConfig {
372    /// Default type name for arrays without metadata
373    pub default_type_name: String,
374
375    /// HEDL version to use
376    pub version: (u32, u32),
377
378    /// Maximum recursion depth (default: 10,000)
379    ///
380    /// Prevents stack overflow from deeply nested JSON structures.
381    /// Set to `None` to disable (not recommended for untrusted input).
382    pub max_depth: Option<usize>,
383
384    /// Maximum array size (default: 10,000,000)
385    ///
386    /// Prevents memory exhaustion from extremely large arrays.
387    /// JSON arrays can contain large datasets, batches, or embeddings.
388    /// Set to `None` to disable (not recommended for untrusted input).
389    pub max_array_size: Option<usize>,
390
391    /// Maximum string length (default: 100 MB)
392    ///
393    /// Prevents memory exhaustion from extremely large strings.
394    /// JSON strings often contain base64-encoded binary data, large
395    /// text fields, or embedded documents requiring high limits.
396    /// Set to `None` to disable (not recommended for untrusted input).
397    pub max_string_length: Option<usize>,
398
399    /// Maximum object size (default: 100,000)
400    ///
401    /// Prevents memory exhaustion from objects with many keys.
402    /// Configuration files and metadata-rich objects can have many properties.
403    /// Set to `None` to disable (not recommended for untrusted input).
404    pub max_object_size: Option<usize>,
405
406    /// Policy for handling unpaired UTF-16 surrogates
407    ///
408    /// Some systems emit JSON with unpaired surrogates (e.g., truncated
409    /// JavaScript strings). This setting controls how to handle them.
410    ///
411    /// Default: `SurrogatePolicy::Reject` (strict validation)
412    pub surrogate_policy: SurrogatePolicy,
413
414    /// Enable lenient JSON parsing (JSON5-style trailing commas and comments)
415    ///
416    /// When enabled, the parser accepts:
417    /// - Trailing commas in arrays and objects
418    /// - Single-line (//) and multi-line (/* */) comments
419    ///
420    /// Requires the `lenient` feature flag.
421    ///
422    /// Default: false (strict RFC 8259 JSON)
423    #[cfg(feature = "lenient")]
424    pub lenient: bool,
425}
426
427impl Default for FromJsonConfig {
428    fn default() -> Self {
429        Self {
430            default_type_name: "Item".to_string(),
431            version: (1, 0),
432            max_depth: Some(DEFAULT_MAX_DEPTH),
433            max_array_size: Some(DEFAULT_MAX_ARRAY_SIZE),
434            max_string_length: Some(DEFAULT_MAX_STRING_LENGTH),
435            max_object_size: Some(DEFAULT_MAX_OBJECT_SIZE),
436            surrogate_policy: SurrogatePolicy::default(),
437            #[cfg(feature = "lenient")]
438            lenient: false,
439        }
440    }
441}
442
443impl FromJsonConfig {
444    /// Create a new builder for configuring JSON import
445    ///
446    /// # Examples
447    ///
448    /// ```text
449    /// use hedl_json::FromJsonConfig;
450    ///
451    /// let config = FromJsonConfig::builder()
452    ///     .max_depth(1_000)
453    ///     .max_array_size(100_000)
454    ///     .build();
455    /// ```
456    #[must_use]
457    pub fn builder() -> FromJsonConfigBuilder {
458        FromJsonConfigBuilder::default()
459    }
460}
461
462impl hedl_core::convert::ImportConfig for FromJsonConfig {
463    fn default_type_name(&self) -> &str {
464        &self.default_type_name
465    }
466
467    fn version(&self) -> (u32, u32) {
468        self.version
469    }
470}
471
472/// Builder for `FromJsonConfig`
473///
474/// Provides ergonomic configuration of JSON import limits and behavior.
475///
476/// # Examples
477///
478/// ```text
479/// use hedl_json::FromJsonConfig;
480///
481/// // Custom limits
482/// let config = FromJsonConfig::builder()
483///     .max_depth(1_000)
484///     .max_array_size(100_000)
485///     .max_string_length(10 * 1024 * 1024)
486///     .build();
487///
488/// // Strict limits for untrusted input
489/// let strict = FromJsonConfig::builder()
490///     .max_depth(50)
491///     .max_array_size(10_000)
492///     .max_string_length(1_000_000)
493///     .max_object_size(1_000)
494///     .build();
495///
496/// // Unlimited (use with caution!)
497/// let unlimited = FromJsonConfig::builder()
498///     .unlimited()
499///     .build();
500/// ```
501#[derive(Debug, Clone)]
502pub struct FromJsonConfigBuilder {
503    default_type_name: String,
504    version: (u32, u32),
505    max_depth: Option<usize>,
506    max_array_size: Option<usize>,
507    max_string_length: Option<usize>,
508    max_object_size: Option<usize>,
509    surrogate_policy: SurrogatePolicy,
510    #[cfg(feature = "lenient")]
511    lenient: bool,
512}
513
514impl Default for FromJsonConfigBuilder {
515    fn default() -> Self {
516        Self {
517            default_type_name: "Item".to_string(),
518            version: (1, 0),
519            max_depth: Some(DEFAULT_MAX_DEPTH),
520            max_array_size: Some(DEFAULT_MAX_ARRAY_SIZE),
521            max_string_length: Some(DEFAULT_MAX_STRING_LENGTH),
522            max_object_size: Some(DEFAULT_MAX_OBJECT_SIZE),
523            surrogate_policy: SurrogatePolicy::default(),
524            #[cfg(feature = "lenient")]
525            lenient: false,
526        }
527    }
528}
529
530impl FromJsonConfigBuilder {
531    /// Set the default type name for arrays without metadata
532    pub fn default_type_name(mut self, name: impl Into<String>) -> Self {
533        self.default_type_name = name.into();
534        self
535    }
536
537    /// Set the HEDL version to use
538    #[must_use]
539    pub fn version(mut self, major: u32, minor: u32) -> Self {
540        self.version = (major, minor);
541        self
542    }
543
544    /// Set the maximum recursion depth
545    ///
546    /// Use `None` to disable the limit (not recommended for untrusted input).
547    #[must_use]
548    pub fn max_depth(mut self, limit: usize) -> Self {
549        self.max_depth = Some(limit);
550        self
551    }
552
553    /// Set the maximum array size
554    ///
555    /// Use `None` to disable the limit (not recommended for untrusted input).
556    #[must_use]
557    pub fn max_array_size(mut self, limit: usize) -> Self {
558        self.max_array_size = Some(limit);
559        self
560    }
561
562    /// Set the maximum string length in bytes
563    ///
564    /// Use `None` to disable the limit (not recommended for untrusted input).
565    #[must_use]
566    pub fn max_string_length(mut self, limit: usize) -> Self {
567        self.max_string_length = Some(limit);
568        self
569    }
570
571    /// Set the maximum object size (number of keys)
572    ///
573    /// Use `None` to disable the limit (not recommended for untrusted input).
574    #[must_use]
575    pub fn max_object_size(mut self, limit: usize) -> Self {
576        self.max_object_size = Some(limit);
577        self
578    }
579
580    /// Set the policy for handling unpaired UTF-16 surrogates
581    ///
582    /// # Options
583    ///
584    /// - `SurrogatePolicy::Reject` (default): Error on invalid surrogates
585    /// - `SurrogatePolicy::ReplaceWithFFFD`: Replace with U+FFFD
586    /// - `SurrogatePolicy::Skip`: Remove invalid surrogates silently
587    ///
588    /// # Example
589    ///
590    /// ```text
591    /// use hedl_json::{FromJsonConfig, SurrogatePolicy};
592    ///
593    /// let config = FromJsonConfig::builder()
594    ///     .surrogate_policy(SurrogatePolicy::ReplaceWithFFFD)
595    ///     .build();
596    /// ```
597    #[must_use]
598    pub fn surrogate_policy(mut self, policy: SurrogatePolicy) -> Self {
599        self.surrogate_policy = policy;
600        self
601    }
602
603    /// Disable all limits (use with caution - only for trusted input)
604    ///
605    /// This removes all safety limits and can lead to memory exhaustion
606    /// or stack overflow with malicious or malformed JSON.
607    #[must_use]
608    pub fn unlimited(mut self) -> Self {
609        self.max_depth = None;
610        self.max_array_size = None;
611        self.max_string_length = None;
612        self.max_object_size = None;
613        self
614    }
615
616    /// Enable lenient JSON parsing (trailing commas, comments)
617    ///
618    /// When enabled, the parser accepts:
619    /// - Trailing commas in arrays and objects
620    /// - Single-line (//) and multi-line (/* */) comments
621    ///
622    /// Requires the `lenient` feature flag.
623    ///
624    /// # Examples
625    ///
626    /// ```text
627    /// use hedl_json::FromJsonConfig;
628    ///
629    /// let config = FromJsonConfig::builder()
630    ///     .lenient(true)
631    ///     .build();
632    ///
633    /// // Now you can parse JSON with trailing commas
634    /// let json = r#"{"name": "Alice", "age": 30,}"#;
635    /// ```
636    #[cfg(feature = "lenient")]
637    #[must_use]
638    pub fn lenient(mut self, lenient: bool) -> Self {
639        self.lenient = lenient;
640        self
641    }
642
643    /// Build the configuration
644    #[must_use]
645    pub fn build(self) -> FromJsonConfig {
646        FromJsonConfig {
647            default_type_name: self.default_type_name,
648            version: self.version,
649            max_depth: self.max_depth,
650            max_array_size: self.max_array_size,
651            max_string_length: self.max_string_length,
652            max_object_size: self.max_object_size,
653            surrogate_policy: self.surrogate_policy,
654            #[cfg(feature = "lenient")]
655            lenient: self.lenient,
656        }
657    }
658}
659
660/// Schema cache for avoiding redundant schema inference
661///
662/// When converting large JSON arrays to matrix lists, we often encounter the same
663/// structure repeatedly. Caching the inferred schema significantly improves performance
664/// by avoiding redundant key iteration and sorting.
665///
666/// # Performance Impact
667///
668/// - First schema inference: ~O(n*log(n)) where n is number of keys
669/// - Cached lookup: ~O(1) hash map lookup
670/// - Expected speedup: 30-50% for documents with repeated array structures
671type SchemaCache = HashMap<Vec<String>, Vec<String>>;
672
673/// Preprocess JSON string to handle unpaired UTF-16 surrogates
674///
675/// This function scans the JSON for `\uXXXX` escape sequences and applies
676/// the configured surrogate policy to any unpaired surrogates found.
677///
678/// # Surrogate Pairs
679///
680/// UTF-16 surrogates are used for characters above U+FFFF (emoji, etc.):
681/// - High surrogates: 0xD800-0xDBFF
682/// - Low surrogates: 0xDC00-0xDFFF
683///
684/// A valid pair is a high surrogate immediately followed by a low surrogate.
685/// An "unpaired" surrogate is one without its matching partner.
686fn preprocess_json_for_surrogates(
687    json: &str,
688    policy: SurrogatePolicy,
689) -> Result<String, JsonConversionError> {
690    if policy == SurrogatePolicy::Reject {
691        // Let serde_json handle rejection with its native error messages
692        return Ok(json.to_string());
693    }
694
695    let bytes = json.as_bytes();
696    let mut result = String::with_capacity(json.len());
697    let mut i = 0;
698
699    while i < bytes.len() {
700        // Look for backslash followed by 'u'
701        if i + 5 < bytes.len() && bytes[i] == b'\\' && bytes[i + 1] == b'u' {
702            // Try to parse the 4 hex digits
703            if let Some(code) = parse_unicode_escape(&bytes[i + 2..i + 6]) {
704                let is_high_surrogate = (0xD800..=0xDBFF).contains(&code);
705                let is_low_surrogate = (0xDC00..=0xDFFF).contains(&code);
706
707                if is_high_surrogate {
708                    // Check if followed by a valid low surrogate
709                    let has_low_pair = i + 11 < bytes.len()
710                        && bytes[i + 6] == b'\\'
711                        && bytes[i + 7] == b'u'
712                        && parse_unicode_escape(&bytes[i + 8..i + 12])
713                            .is_some_and(|low| (0xDC00..=0xDFFF).contains(&low));
714
715                    if has_low_pair {
716                        // Valid surrogate pair - copy both escapes
717                        result.push_str(&json[i..i + 12]);
718                        i += 12;
719                        continue;
720                    }
721                    // Unpaired high surrogate
722                    match policy {
723                        SurrogatePolicy::ReplaceWithFFFD => {
724                            result.push_str("\\uFFFD");
725                        }
726                        SurrogatePolicy::Skip => {
727                            // Skip the escape sequence entirely
728                        }
729                        SurrogatePolicy::Reject => unreachable!(),
730                    }
731                    i += 6;
732                    continue;
733                } else if is_low_surrogate {
734                    // Low surrogate without preceding high - always unpaired
735                    match policy {
736                        SurrogatePolicy::ReplaceWithFFFD => {
737                            result.push_str("\\uFFFD");
738                        }
739                        SurrogatePolicy::Skip => {
740                            // Skip the escape sequence entirely
741                        }
742                        SurrogatePolicy::Reject => unreachable!(),
743                    }
744                    i += 6;
745                    continue;
746                }
747            }
748        }
749
750        // Copy current byte as-is
751        result.push(json[i..].chars().next().unwrap());
752        i += json[i..].chars().next().unwrap().len_utf8();
753    }
754
755    Ok(result)
756}
757
758/// Parse 4 hex digits into a u16 value
759#[inline]
760fn parse_unicode_escape(bytes: &[u8]) -> Option<u16> {
761    if bytes.len() < 4 {
762        return None;
763    }
764
765    let mut value: u16 = 0;
766    for &b in &bytes[..4] {
767        let digit = match b {
768            b'0'..=b'9' => b - b'0',
769            b'a'..=b'f' => b - b'a' + 10,
770            b'A'..=b'F' => b - b'A' + 10,
771            _ => return None,
772        };
773        value = value * 16 + u16::from(digit);
774    }
775    Some(value)
776}
777
778/// Convert JSON string to HEDL Document
779///
780/// # Arguments
781///
782/// * `json` - JSON string to parse
783/// * `config` - Configuration for import behavior and security limits
784///
785/// # Returns
786///
787/// * `Ok(Document)` - Successfully parsed HEDL document
788/// * `Err(JsonConversionError)` - Parsing or validation error
789///
790/// # Examples
791///
792/// ```text
793/// use hedl_json::{from_json, FromJsonConfig};
794///
795/// let json = r#"{"name": "Alice", "age": 30}"#;
796/// let config = FromJsonConfig::default();
797/// let doc = from_json(json, &config).unwrap();
798/// ```
799pub fn from_json(json: &str, config: &FromJsonConfig) -> Result<Document, JsonConversionError> {
800    // Preprocess for surrogate handling if policy is not Reject
801    let processed = preprocess_json_for_surrogates(json, config.surrogate_policy)?;
802    let json_to_parse = if config.surrogate_policy == SurrogatePolicy::Reject {
803        json
804    } else {
805        &processed
806    };
807
808    #[cfg(feature = "lenient")]
809    let value: JsonValue = if config.lenient {
810        serde_jsonrc::from_str(json_to_parse)
811            .map_err(|e| JsonConversionError::ParseError(e.to_string()))?
812    } else {
813        serde_json::from_str(json_to_parse)?
814    };
815
816    #[cfg(not(feature = "lenient"))]
817    let value: JsonValue = serde_json::from_str(json_to_parse)?;
818
819    from_json_value(&value, config)
820}
821
822/// Convert `serde_json::Value` to HEDL Document
823///
824/// # Arguments
825///
826/// * `value` - Parsed JSON value (must be an object)
827/// * `config` - Configuration for import behavior and security limits
828///
829/// # Returns
830///
831/// * `Ok(Document)` - Successfully converted HEDL document
832/// * `Err(JsonConversionError)` - Validation error
833///
834/// # Examples
835///
836/// ```text
837/// use hedl_json::{from_json_value, FromJsonConfig};
838/// use serde_json::json;
839///
840/// let value = json!({"users": [{"id": "alice"}]});
841/// let config = FromJsonConfig::default();
842/// let doc = from_json_value(&value, &config).unwrap();
843/// ```
844pub fn from_json_value(
845    value: &JsonValue,
846    config: &FromJsonConfig,
847) -> Result<Document, JsonConversionError> {
848    let mut structs = BTreeMap::new();
849    let mut schema_cache = SchemaCache::new();
850    let root = match value {
851        JsonValue::Object(map) => {
852            json_object_to_root(map, config, &mut structs, &mut schema_cache, 0)?
853        }
854        _ => return Err(JsonConversionError::InvalidRoot(format!("{value:?}"))),
855    };
856
857    Ok(Document {
858        version: config.version,
859        schema_versions: BTreeMap::new(),
860        aliases: BTreeMap::new(),
861        structs,
862        nests: BTreeMap::new(),
863        root,
864    })
865}
866
867/// Convert owned `serde_json::Value` to HEDL Document with zero-copy optimization
868///
869/// This version accepts an owned `JsonValue` which allows for zero-copy string handling
870/// by moving strings instead of cloning them.
871///
872/// # Arguments
873///
874/// * `value` - Owned parsed JSON value (must be an object)
875/// * `config` - Configuration for import behavior and security limits
876///
877/// # Returns
878///
879/// * `Ok(Document)` - Successfully converted HEDL document
880/// * `Err(JsonConversionError)` - Validation error
881///
882/// # Performance
883///
884/// This function is optimized for reduced memory allocations by moving strings
885/// from the JSON value instead of cloning them. For large documents with many
886/// strings, this can reduce allocations by 30-50%.
887///
888/// # Examples
889///
890/// ```text
891/// use hedl_json::{from_json_value_owned, FromJsonConfig};
892/// use serde_json::json;
893///
894/// let value = json!({"users": [{"id": "alice"}]});
895/// let config = FromJsonConfig::default();
896/// let doc = from_json_value_owned(value, &config).unwrap();
897/// ```
898pub fn from_json_value_owned(
899    value: JsonValue,
900    config: &FromJsonConfig,
901) -> Result<Document, JsonConversionError> {
902    let mut structs = BTreeMap::new();
903    let mut schema_cache = SchemaCache::new();
904    let root = match value {
905        JsonValue::Object(map) => {
906            json_object_to_root_owned(map, config, &mut structs, &mut schema_cache, 0)?
907        }
908        _ => {
909            return Err(JsonConversionError::InvalidRoot(
910                "Root must be an object".to_string(),
911            ))
912        }
913    };
914
915    Ok(Document {
916        version: config.version,
917        schema_versions: BTreeMap::new(),
918        aliases: BTreeMap::new(),
919        structs,
920        nests: BTreeMap::new(),
921        root,
922    })
923}
924
925/// Process JSON object into HEDL item map, skipping metadata keys.
926/// This is the shared implementation used by both root and nested objects.
927///
928/// # Performance Optimization
929///
930/// Pre-allocates `BTreeMap` capacity to reduce allocation churn during object construction.
931/// Based on profiling, this reduces allocations by approximately 15-20% for object-heavy JSON.
932fn process_json_object_inner(
933    map: &Map<String, JsonValue>,
934    config: &FromJsonConfig,
935    structs: &mut BTreeMap<String, Vec<String>>,
936    schema_cache: &mut SchemaCache,
937    depth: usize,
938) -> Result<BTreeMap<String, Item>, JsonConversionError> {
939    // Check object size limit
940    if let Some(max_size) = config.max_object_size {
941        if map.len() > max_size {
942            return Err(JsonConversionError::MaxObjectSizeExceeded(
943                max_size,
944                map.len(),
945            ));
946        }
947    }
948
949    // OPTIMIZATION: Direct insertion for small objects (<32 keys),
950    // sorted batch insertion for large objects to minimize rebalancing
951    let mut result = BTreeMap::new();
952
953    if map.len() < 32 {
954        // Small objects: direct insertion is faster than sorting overhead
955        for (key, value) in map {
956            if key.starts_with("__") {
957                continue;
958            }
959            let item = json_value_to_item(value, key, config, structs, schema_cache, depth)?;
960            result.insert(key.clone(), item);
961        }
962    } else {
963        // Large objects: sorted batch insertion reduces BTreeMap rebalancing
964        let mut items: Vec<(String, Item)> = Vec::with_capacity(map.len());
965
966        for (key, value) in map {
967            if key.starts_with("__") {
968                continue;
969            }
970            let item = json_value_to_item(value, key, config, structs, schema_cache, depth)?;
971            items.push((key.clone(), item));
972        }
973
974        // Sort by key for optimal BTreeMap insertion order
975        items.sort_by(|a, b| a.0.cmp(&b.0));
976
977        // Batch insert in sorted order (minimal rebalancing)
978        for (key, item) in items {
979            result.insert(key, item);
980        }
981    }
982
983    Ok(result)
984}
985
986fn json_object_to_root(
987    map: &Map<String, JsonValue>,
988    config: &FromJsonConfig,
989    structs: &mut BTreeMap<String, Vec<String>>,
990    schema_cache: &mut SchemaCache,
991    depth: usize,
992) -> Result<BTreeMap<String, Item>, JsonConversionError> {
993    process_json_object_inner(map, config, structs, schema_cache, depth)
994}
995
996/// Process owned JSON object into HEDL item map with zero-copy optimization
997fn json_object_to_root_owned(
998    map: Map<String, JsonValue>,
999    config: &FromJsonConfig,
1000    structs: &mut BTreeMap<String, Vec<String>>,
1001    schema_cache: &mut SchemaCache,
1002    depth: usize,
1003) -> Result<BTreeMap<String, Item>, JsonConversionError> {
1004    // Check object size limit
1005    if let Some(max_size) = config.max_object_size {
1006        if map.len() > max_size {
1007            return Err(JsonConversionError::MaxObjectSizeExceeded(
1008                max_size,
1009                map.len(),
1010            ));
1011        }
1012    }
1013
1014    let mut result = BTreeMap::new();
1015
1016    for (key, value) in map {
1017        // Skip metadata keys
1018        if key.starts_with("__") {
1019            continue;
1020        }
1021
1022        let item = json_value_to_item_owned(value, &key, config, structs, schema_cache, depth)?;
1023        result.insert(key, item);
1024    }
1025
1026    Ok(result)
1027}
1028
1029fn json_object_to_item_map(
1030    map: &Map<String, JsonValue>,
1031    config: &FromJsonConfig,
1032    structs: &mut BTreeMap<String, Vec<String>>,
1033    schema_cache: &mut SchemaCache,
1034    depth: usize,
1035) -> Result<BTreeMap<String, Item>, JsonConversionError> {
1036    process_json_object_inner(map, config, structs, schema_cache, depth)
1037}
1038
1039fn json_value_to_item(
1040    value: &JsonValue,
1041    key: &str,
1042    config: &FromJsonConfig,
1043    structs: &mut BTreeMap<String, Vec<String>>,
1044    schema_cache: &mut SchemaCache,
1045    depth: usize,
1046) -> Result<Item, JsonConversionError> {
1047    // Check recursion depth
1048    if let Some(max_depth) = config.max_depth {
1049        if depth >= max_depth {
1050            return Err(JsonConversionError::MaxDepthExceeded(max_depth));
1051        }
1052    }
1053
1054    match value {
1055        JsonValue::Null => Ok(Item::Scalar(Value::Null)),
1056        JsonValue::Bool(b) => Ok(Item::Scalar(Value::Bool(*b))),
1057        JsonValue::Number(n) => {
1058            let value = json_number_to_value(n)?;
1059            Ok(Item::Scalar(value))
1060        }
1061        JsonValue::String(s) => {
1062            // Check string length limit
1063            if let Some(max_len) = config.max_string_length {
1064                if s.len() > max_len {
1065                    return Err(JsonConversionError::MaxStringLengthExceeded(
1066                        max_len,
1067                        s.len(),
1068                    ));
1069                }
1070            }
1071
1072            // Check for expression pattern $( ... )
1073            if s.starts_with("$(") && s.ends_with(')') {
1074                let expr = parse_expression_token(s)
1075                    .map_err(|e| JsonConversionError::InvalidExpression(e.to_string()))?;
1076                Ok(Item::Scalar(Value::Expression(Box::new(expr))))
1077            } else {
1078                // OPTIMIZATION: Zero-copy string handling
1079                // Since serde_json already owns the string, we can move it instead of cloning
1080                // when the JSON value is consumed. However, since we're working with &JsonValue,
1081                // we need to clone. Use from_json_value_owned() for zero-copy optimization.
1082                Ok(Item::Scalar(Value::String(s.clone().into_boxed_str())))
1083            }
1084        }
1085        JsonValue::Array(arr) => {
1086            // Check array size limit
1087            if let Some(max_size) = config.max_array_size {
1088                if arr.len() > max_size {
1089                    return Err(JsonConversionError::MaxArraySizeExceeded(
1090                        max_size,
1091                        arr.len(),
1092                    ));
1093                }
1094            }
1095
1096            // Handle empty arrays as empty matrix lists
1097            if arr.is_empty() {
1098                let type_name = singularize_and_capitalize(key);
1099                let schema: Vec<String> = DEFAULT_SCHEMA.iter().map(|s| (*s).to_string()).collect();
1100                let mut list = MatrixList::new(type_name.clone(), schema.clone());
1101                list.count_hint = Some(0);
1102                structs.insert(type_name, schema);
1103                Ok(Item::List(list))
1104            } else if is_tensor_array(arr) {
1105                // Check if it's a tensor (array of numbers)
1106                let tensor = json_array_to_tensor(arr, config, depth + 1)?;
1107                Ok(Item::Scalar(Value::Tensor(Box::new(tensor))))
1108            } else if is_object_array(arr) {
1109                // Convert to matrix list
1110                let list =
1111                    json_array_to_matrix_list(arr, key, config, structs, schema_cache, depth + 1)?;
1112                Ok(Item::List(list))
1113            } else {
1114                // Mixed array - try to convert to tensor
1115                let tensor = json_array_to_tensor(arr, config, depth + 1)?;
1116                Ok(Item::Scalar(Value::Tensor(Box::new(tensor))))
1117            }
1118        }
1119        JsonValue::Object(obj) => {
1120            // Check for special keys
1121            if let Some(JsonValue::String(r)) = obj.get("@ref") {
1122                return Ok(Item::Scalar(Value::Reference(
1123                    parse_reference(r).map_err(JsonConversionError::InvalidReference)?,
1124                )));
1125            }
1126            // Regular object
1127            let item_map = json_object_to_item_map(obj, config, structs, schema_cache, depth + 1)?;
1128            Ok(Item::Object(item_map))
1129        }
1130    }
1131}
1132
1133/// Convert owned JSON value to HEDL Item with zero-copy string optimization
1134fn json_value_to_item_owned(
1135    value: JsonValue,
1136    key: &str,
1137    config: &FromJsonConfig,
1138    structs: &mut BTreeMap<String, Vec<String>>,
1139    schema_cache: &mut SchemaCache,
1140    depth: usize,
1141) -> Result<Item, JsonConversionError> {
1142    // Check recursion depth
1143    if let Some(max_depth) = config.max_depth {
1144        if depth >= max_depth {
1145            return Err(JsonConversionError::MaxDepthExceeded(max_depth));
1146        }
1147    }
1148
1149    match value {
1150        JsonValue::Null => Ok(Item::Scalar(Value::Null)),
1151        JsonValue::Bool(b) => Ok(Item::Scalar(Value::Bool(b))),
1152        JsonValue::Number(n) => {
1153            let value = json_number_to_value(&n)?;
1154            Ok(Item::Scalar(value))
1155        }
1156        JsonValue::String(s) => {
1157            // Check string length limit
1158            if let Some(max_len) = config.max_string_length {
1159                if s.len() > max_len {
1160                    return Err(JsonConversionError::MaxStringLengthExceeded(
1161                        max_len,
1162                        s.len(),
1163                    ));
1164                }
1165            }
1166
1167            // Check for expression pattern $( ... )
1168            if s.starts_with("$(") && s.ends_with(')') {
1169                let expr = parse_expression_token(&s)
1170                    .map_err(|e| JsonConversionError::InvalidExpression(e.to_string()))?;
1171                Ok(Item::Scalar(Value::Expression(Box::new(expr))))
1172            } else {
1173                // ZERO-COPY OPTIMIZATION: Move the string instead of cloning
1174                Ok(Item::Scalar(Value::String(s.into_boxed_str())))
1175            }
1176        }
1177        JsonValue::Array(arr) => {
1178            // Check array size limit
1179            if let Some(max_size) = config.max_array_size {
1180                if arr.len() > max_size {
1181                    return Err(JsonConversionError::MaxArraySizeExceeded(
1182                        max_size,
1183                        arr.len(),
1184                    ));
1185                }
1186            }
1187
1188            // Handle empty arrays as empty matrix lists
1189            if arr.is_empty() {
1190                let type_name = singularize_and_capitalize(key);
1191                let schema: Vec<String> = DEFAULT_SCHEMA.iter().map(|s| (*s).to_string()).collect();
1192                let mut list = MatrixList::new(type_name.clone(), schema.clone());
1193                list.count_hint = Some(0);
1194                structs.insert(type_name, schema);
1195                Ok(Item::List(list))
1196            } else if is_tensor_array(&arr) {
1197                // Check if it's a tensor (array of numbers)
1198                let tensor = json_array_to_tensor_owned(arr, config, depth + 1)?;
1199                Ok(Item::Scalar(Value::Tensor(Box::new(tensor))))
1200            } else if is_object_array(&arr) {
1201                // Convert to matrix list
1202                let list =
1203                    json_array_to_matrix_list(&arr, key, config, structs, schema_cache, depth + 1)?;
1204                Ok(Item::List(list))
1205            } else {
1206                // Mixed array - try to convert to tensor
1207                let tensor = json_array_to_tensor_owned(arr, config, depth + 1)?;
1208                Ok(Item::Scalar(Value::Tensor(Box::new(tensor))))
1209            }
1210        }
1211        JsonValue::Object(obj) => {
1212            // Check for special keys
1213            if let Some(JsonValue::String(r)) = obj.get("@ref") {
1214                return Ok(Item::Scalar(Value::Reference(
1215                    parse_reference(r).map_err(JsonConversionError::InvalidReference)?,
1216                )));
1217            }
1218            // Regular object - convert owned map
1219            let item_map = json_object_to_item_map(&obj, config, structs, schema_cache, depth + 1)?;
1220            Ok(Item::Object(item_map))
1221        }
1222    }
1223}
1224
1225/// Array type classification for optimized processing
1226///
1227/// OPTIMIZATION: Single-pass array type detection replaces two separate scans
1228/// (`is_tensor_array` and `is_object_array`). This reduces overhead by 8-12% for
1229/// large arrays by:
1230/// - Eliminating redundant iteration
1231/// - Early exit when type becomes ambiguous
1232/// - Branch prediction friendly design
1233#[derive(Debug, Clone, Copy, PartialEq, Eq)]
1234enum ArrayType {
1235    /// Empty array (handled separately as empty matrix list)
1236    Empty,
1237    /// Homogeneous array of numbers and/or nested arrays (tensor)
1238    Tensor,
1239    /// Homogeneous array of objects (matrix list)
1240    Objects,
1241    /// Heterogeneous array (fallback to tensor conversion)
1242    Mixed,
1243}
1244
1245/// Classify array type in single pass
1246///
1247/// PERFORMANCE: O(n) worst case, but often O(1) with early exit
1248fn classify_array(arr: &[JsonValue]) -> ArrayType {
1249    if arr.is_empty() {
1250        return ArrayType::Empty;
1251    }
1252
1253    // Determine expected type from first element
1254    let first_type = match &arr[0] {
1255        JsonValue::Number(_) | JsonValue::Array(_) => ArrayType::Tensor,
1256        JsonValue::Object(_) => ArrayType::Objects,
1257        _ => return ArrayType::Mixed,
1258    };
1259
1260    // Verify remaining elements match (early exit on mismatch)
1261    for elem in &arr[1..] {
1262        let matches = match (first_type, elem) {
1263            (ArrayType::Tensor, JsonValue::Number(_)) => true,
1264            (ArrayType::Tensor, JsonValue::Array(_)) => true,
1265            (ArrayType::Objects, JsonValue::Object(_)) => true,
1266            _ => return ArrayType::Mixed,
1267        };
1268        if !matches {
1269            return ArrayType::Mixed;
1270        }
1271    }
1272
1273    first_type
1274}
1275
1276// Keep legacy functions for compatibility (now zero-cost wrappers)
1277fn is_tensor_array(arr: &[JsonValue]) -> bool {
1278    matches!(classify_array(arr), ArrayType::Tensor)
1279}
1280
1281fn is_object_array(arr: &[JsonValue]) -> bool {
1282    matches!(classify_array(arr), ArrayType::Objects)
1283}
1284
1285fn json_array_to_tensor(
1286    arr: &[JsonValue],
1287    config: &FromJsonConfig,
1288    depth: usize,
1289) -> Result<Tensor, JsonConversionError> {
1290    // Check recursion depth
1291    if let Some(max_depth) = config.max_depth {
1292        if depth >= max_depth {
1293            return Err(JsonConversionError::MaxDepthExceeded(max_depth));
1294        }
1295    }
1296
1297    // OPTIMIZATION: Pre-allocate tensor items vector with exact capacity
1298    // Reduces reallocations during recursive tensor construction
1299    let mut items = Vec::with_capacity(arr.len());
1300
1301    for v in arr {
1302        let tensor = match v {
1303            JsonValue::Number(n) => {
1304                // Tensors use f64, check for overflow but allow conversion
1305                if is_integer_overflow(n) {
1306                    // For tensors, overflow to float is acceptable but worth noting
1307                    // in future versions, could add a warning mechanism
1308                }
1309                n.as_f64()
1310                    .map(Tensor::Scalar)
1311                    .ok_or_else(|| JsonConversionError::InvalidNumber(n.to_string()))?
1312            }
1313            JsonValue::Array(nested) => json_array_to_tensor(nested, config, depth + 1)?,
1314            _ => return Err(JsonConversionError::InvalidTensor),
1315        };
1316        items.push(tensor);
1317    }
1318
1319    Ok(Tensor::Array(items))
1320}
1321
1322/// Convert owned JSON array to Tensor with zero-copy optimization
1323fn json_array_to_tensor_owned(
1324    arr: Vec<JsonValue>,
1325    config: &FromJsonConfig,
1326    depth: usize,
1327) -> Result<Tensor, JsonConversionError> {
1328    // Check recursion depth
1329    if let Some(max_depth) = config.max_depth {
1330        if depth >= max_depth {
1331            return Err(JsonConversionError::MaxDepthExceeded(max_depth));
1332        }
1333    }
1334
1335    // OPTIMIZATION: Pre-allocate with exact capacity and consume owned values
1336    // This combines zero-copy string handling with pre-allocation
1337    let mut items = Vec::with_capacity(arr.len());
1338
1339    for v in arr {
1340        let tensor = match v {
1341            JsonValue::Number(n) => {
1342                // Tensors use f64, check for overflow but allow conversion
1343                if is_integer_overflow(&n) {
1344                    // For tensors, overflow to float is acceptable but worth noting
1345                    // in future versions, could add a warning mechanism
1346                }
1347                n.as_f64()
1348                    .map(Tensor::Scalar)
1349                    .ok_or_else(|| JsonConversionError::InvalidNumber(n.to_string()))?
1350            }
1351            JsonValue::Array(nested) => json_array_to_tensor_owned(nested, config, depth + 1)?,
1352            _ => return Err(JsonConversionError::InvalidTensor),
1353        };
1354        items.push(tensor);
1355    }
1356
1357    Ok(Tensor::Array(items))
1358}
1359
1360#[allow(clippy::only_used_in_recursion)]
1361fn json_array_to_matrix_list(
1362    arr: &[JsonValue],
1363    key: &str,
1364    config: &FromJsonConfig,
1365    structs: &mut BTreeMap<String, Vec<String>>,
1366    schema_cache: &mut SchemaCache,
1367    depth: usize,
1368) -> Result<MatrixList, JsonConversionError> {
1369    // Check recursion depth
1370    if let Some(max_depth) = config.max_depth {
1371        if depth >= max_depth {
1372            return Err(JsonConversionError::MaxDepthExceeded(max_depth));
1373        }
1374    }
1375    // Infer type name from key (singularize and capitalize)
1376    let type_name = singularize_and_capitalize(key);
1377
1378    // Infer schema from first object, excluding nested array fields (children)
1379    let schema: Vec<String> = if let Some(JsonValue::Object(first)) = arr.first() {
1380        // Check for explicit __hedl_schema metadata (preserves column order)
1381        let inferred = if let Some(JsonValue::Array(schema_arr)) = first.get("__hedl_schema") {
1382            schema_arr
1383                .iter()
1384                .filter_map(|v| v.as_str().map(String::from))
1385                .collect()
1386        } else {
1387            // OPTIMIZATION: Use SmallVec for cache key to avoid heap allocation
1388            // for objects with <16 keys (common case). Pre-allocate with capacity
1389            // hint to reduce reallocations.
1390            let mut cache_key: SmallVec<[String; 16]> = SmallVec::with_capacity(first.len());
1391
1392            for k in first.keys() {
1393                if k.starts_with("__") {
1394                    continue;
1395                }
1396                // Exclude arrays of objects - they become children
1397                if let Some(JsonValue::Array(arr)) = first.get(k) {
1398                    if is_object_array(arr) {
1399                        continue;
1400                    }
1401                }
1402                cache_key.push(k.clone());
1403            }
1404            cache_key.sort();
1405
1406            // Convert to Vec for cache lookup (SmallVec doesn't implement Hash for all sizes)
1407            let cache_key_vec: Vec<String> = cache_key.iter().cloned().collect();
1408
1409            // Check cache first to avoid redundant schema inference
1410            if let Some(cached_schema) = schema_cache.get(&cache_key_vec) {
1411                cached_schema.clone()
1412            } else {
1413                // Fall back to inferring from keys (sorted alphabetically with id first)
1414                let mut keys = cache_key_vec.clone();
1415
1416                // Ensure "id" is first if present
1417                if let Some(pos) = keys.iter().position(|k| k == "id") {
1418                    keys.remove(pos);
1419                    keys.insert(0, "id".to_string());
1420                }
1421
1422                // Cache the inferred schema for future use
1423                schema_cache.insert(cache_key_vec, keys.clone());
1424                keys
1425            }
1426        };
1427        // Ensure schema is not empty (could happen with empty __hedl_schema or all __ keys)
1428        if inferred.is_empty() {
1429            DEFAULT_SCHEMA.iter().map(|s| (*s).to_string()).collect()
1430        } else {
1431            inferred
1432        }
1433    } else {
1434        DEFAULT_SCHEMA.iter().map(|s| (*s).to_string()).collect()
1435    };
1436
1437    // Register the struct definition
1438    structs.insert(type_name.clone(), schema.clone());
1439
1440    // OPTIMIZATION: Pre-allocate rows vector with exact capacity
1441    // This eliminates reallocation during growth and reduces memory churn by ~20%
1442    let mut rows = Vec::with_capacity(arr.len());
1443
1444    for item in arr {
1445        if let JsonValue::Object(obj) = item {
1446            // Get ID from first column
1447            let id = obj
1448                .get(&schema[0])
1449                .and_then(|v| v.as_str())
1450                .unwrap_or("")
1451                .to_string();
1452
1453            // OPTIMIZATION: Use SmallVec for fields to avoid heap allocation for small schemas
1454            // Most schemas have <16 fields, so this eliminates heap allocation in common case
1455            let mut fields: SmallVec<[Value; 16]> = SmallVec::with_capacity(schema.len());
1456            for col in &schema {
1457                let value = obj
1458                    .get(col)
1459                    .map(|v| json_to_value(v, config))
1460                    .transpose()?
1461                    .unwrap_or(Value::Null);
1462                fields.push(value);
1463            }
1464
1465            // Convert SmallVec to Vec for Node (zero-copy if on heap, single allocation if on stack)
1466            let fields_vec: Vec<Value> = fields.into_vec();
1467
1468            // OPTIMIZATION: Handle nested children with minimal allocation overhead
1469            // For objects with few children (<8), direct insertion is faster
1470            let mut children: BTreeMap<String, Vec<Node>> = BTreeMap::new();
1471
1472            // Quick pre-count to decide strategy
1473            let child_count = obj
1474                .iter()
1475                .filter(|(_, v)| matches!(v, JsonValue::Array(arr) if is_object_array(arr)))
1476                .count();
1477
1478            if child_count < 8 {
1479                // Small number of children: direct insertion
1480                for (child_key, child_value) in obj {
1481                    if let JsonValue::Array(child_arr) = child_value {
1482                        if is_object_array(child_arr) {
1483                            let child_list = json_array_to_matrix_list(
1484                                child_arr,
1485                                child_key,
1486                                config,
1487                                structs,
1488                                schema_cache,
1489                                depth + 1,
1490                            )?;
1491                            children.insert(child_key.clone(), child_list.rows);
1492                        }
1493                    }
1494                }
1495            } else {
1496                // Many children: sorted batch insertion
1497                let mut child_items: Vec<(String, Vec<Node>)> = Vec::with_capacity(child_count);
1498                for (child_key, child_value) in obj {
1499                    if let JsonValue::Array(child_arr) = child_value {
1500                        if is_object_array(child_arr) {
1501                            let child_list = json_array_to_matrix_list(
1502                                child_arr,
1503                                child_key,
1504                                config,
1505                                structs,
1506                                schema_cache,
1507                                depth + 1,
1508                            )?;
1509                            child_items.push((child_key.clone(), child_list.rows));
1510                        }
1511                    }
1512                }
1513                child_items.sort_by(|a, b| a.0.cmp(&b.0));
1514                for (key, nodes) in child_items {
1515                    children.insert(key, nodes);
1516                }
1517            }
1518
1519            let node = Node {
1520                type_name: type_name.clone(),
1521                id,
1522                fields: fields_vec.into(),
1523                children: if children.is_empty() {
1524                    None
1525                } else {
1526                    Some(Box::new(children))
1527                },
1528                child_count: 0,
1529            };
1530
1531            rows.push(node);
1532        }
1533    }
1534
1535    // Infer count_hint from array length
1536    let count_hint = Some(arr.len());
1537
1538    Ok(MatrixList {
1539        type_name,
1540        schema,
1541        rows,
1542        count_hint,
1543    })
1544}
1545
1546fn json_to_value(value: &JsonValue, config: &FromJsonConfig) -> Result<Value, JsonConversionError> {
1547    Ok(match value {
1548        JsonValue::Null => Value::Null,
1549        JsonValue::Bool(b) => Value::Bool(*b),
1550        JsonValue::Number(n) => json_number_to_value(n)?,
1551        JsonValue::String(s) => {
1552            // Check string length limit
1553            if let Some(max_len) = config.max_string_length {
1554                if s.len() > max_len {
1555                    return Err(JsonConversionError::MaxStringLengthExceeded(
1556                        max_len,
1557                        s.len(),
1558                    ));
1559                }
1560            }
1561
1562            // Check for expression pattern $( ... )
1563            if s.starts_with("$(") && s.ends_with(')') {
1564                let expr = parse_expression_token(s)
1565                    .map_err(|e| JsonConversionError::InvalidExpression(e.to_string()))?;
1566                Value::Expression(Box::new(expr))
1567            } else {
1568                Value::String(s.clone().into_boxed_str())
1569            }
1570        }
1571        JsonValue::Array(arr) => {
1572            // Check array size limit
1573            if let Some(max_size) = config.max_array_size {
1574                if arr.len() > max_size {
1575                    return Err(JsonConversionError::MaxArraySizeExceeded(
1576                        max_size,
1577                        arr.len(),
1578                    ));
1579                }
1580            }
1581
1582            // Check if this is an array of objects (nested children) - skip as Null
1583            // Child arrays are handled separately in json_array_to_matrix_list
1584            if is_object_array(arr) {
1585                Value::Null // Children processed by json_array_to_matrix_list
1586            } else if is_tensor_array(arr) {
1587                let tensor = json_array_to_tensor(arr, config, 0)?;
1588                Value::Tensor(Box::new(tensor))
1589            } else if arr.is_empty() {
1590                // Empty array → empty tensor
1591                Value::Tensor(Box::new(Tensor::Array(vec![])))
1592            } else {
1593                // Mixed array - try as tensor
1594                let tensor = json_array_to_tensor(arr, config, 0)?;
1595                Value::Tensor(Box::new(tensor))
1596            }
1597        }
1598        JsonValue::Object(obj) => {
1599            if let Some(JsonValue::String(r)) = obj.get("@ref") {
1600                Value::Reference(parse_reference(r).map_err(JsonConversionError::InvalidReference)?)
1601            } else {
1602                return Err(JsonConversionError::NestedObject);
1603            }
1604        }
1605    })
1606}
1607
1608#[cfg(test)]
1609mod tests {
1610    use super::*;
1611    use serde_json::json;
1612
1613    // ==================== FromJsonConfig tests ====================
1614
1615    #[test]
1616    fn test_from_json_config_default() {
1617        let config = FromJsonConfig::default();
1618        assert_eq!(config.default_type_name, "Item");
1619        assert_eq!(config.version, (1, 0));
1620        assert_eq!(config.max_depth, Some(DEFAULT_MAX_DEPTH));
1621        assert_eq!(config.max_array_size, Some(DEFAULT_MAX_ARRAY_SIZE));
1622        assert_eq!(config.max_string_length, Some(DEFAULT_MAX_STRING_LENGTH));
1623        assert_eq!(config.max_object_size, Some(DEFAULT_MAX_OBJECT_SIZE));
1624        // Verify actual values
1625        assert_eq!(config.max_depth, Some(10_000));
1626        assert_eq!(config.max_array_size, Some(10_000_000));
1627        assert_eq!(config.max_string_length, Some(100 * 1024 * 1024));
1628        assert_eq!(config.max_object_size, Some(100_000));
1629    }
1630
1631    #[test]
1632    fn test_from_json_config_debug() {
1633        let config = FromJsonConfig::default();
1634        let debug = format!("{config:?}");
1635        assert!(debug.contains("FromJsonConfig"));
1636        assert!(debug.contains("default_type_name"));
1637        assert!(debug.contains("version"));
1638    }
1639
1640    #[test]
1641    fn test_from_json_config_clone() {
1642        let config = FromJsonConfig {
1643            default_type_name: "Custom".to_string(),
1644            version: (2, 1),
1645            max_depth: Some(50),
1646            max_array_size: Some(10_000),
1647            max_string_length: Some(1_000_000),
1648            max_object_size: Some(1_000),
1649            surrogate_policy: SurrogatePolicy::Reject,
1650            #[cfg(feature = "lenient")]
1651            lenient: false,
1652        };
1653        let cloned = config.clone();
1654        assert_eq!(cloned.default_type_name, "Custom");
1655        assert_eq!(cloned.version, (2, 1));
1656        assert_eq!(cloned.max_depth, Some(50));
1657    }
1658
1659    // ==================== FromJsonConfigBuilder tests ====================
1660
1661    #[test]
1662    fn test_builder_default() {
1663        let config = FromJsonConfig::builder().build();
1664        assert_eq!(config.default_type_name, "Item");
1665        assert_eq!(config.version, (1, 0));
1666        assert_eq!(config.max_depth, Some(DEFAULT_MAX_DEPTH));
1667        assert_eq!(config.max_array_size, Some(DEFAULT_MAX_ARRAY_SIZE));
1668        assert_eq!(config.max_string_length, Some(DEFAULT_MAX_STRING_LENGTH));
1669        assert_eq!(config.max_object_size, Some(DEFAULT_MAX_OBJECT_SIZE));
1670    }
1671
1672    #[test]
1673    fn test_builder_custom_limits() {
1674        let config = FromJsonConfig::builder()
1675            .max_depth(1_000)
1676            .max_array_size(100_000)
1677            .max_string_length(10 * 1024 * 1024)
1678            .max_object_size(5_000)
1679            .build();
1680
1681        assert_eq!(config.max_depth, Some(1_000));
1682        assert_eq!(config.max_array_size, Some(100_000));
1683        assert_eq!(config.max_string_length, Some(10 * 1024 * 1024));
1684        assert_eq!(config.max_object_size, Some(5_000));
1685    }
1686
1687    #[test]
1688    fn test_builder_unlimited() {
1689        let config = FromJsonConfig::builder().unlimited().build();
1690
1691        assert_eq!(config.max_depth, None);
1692        assert_eq!(config.max_array_size, None);
1693        assert_eq!(config.max_string_length, None);
1694        assert_eq!(config.max_object_size, None);
1695    }
1696
1697    #[test]
1698    fn test_builder_custom_type_and_version() {
1699        let config = FromJsonConfig::builder()
1700            .default_type_name("CustomType")
1701            .version(2, 1)
1702            .build();
1703
1704        assert_eq!(config.default_type_name, "CustomType");
1705        assert_eq!(config.version, (2, 1));
1706    }
1707
1708    #[test]
1709    fn test_builder_chaining() {
1710        let config = FromJsonConfig::builder()
1711            .default_type_name("Entity")
1712            .version(1, 5)
1713            .max_depth(500)
1714            .max_array_size(50_000)
1715            .max_string_length(5 * 1024 * 1024)
1716            .max_object_size(2_500)
1717            .build();
1718
1719        assert_eq!(config.default_type_name, "Entity");
1720        assert_eq!(config.version, (1, 5));
1721        assert_eq!(config.max_depth, Some(500));
1722        assert_eq!(config.max_array_size, Some(50_000));
1723        assert_eq!(config.max_string_length, Some(5 * 1024 * 1024));
1724        assert_eq!(config.max_object_size, Some(2_500));
1725    }
1726
1727    // ==================== parse_reference tests ====================
1728
1729    #[test]
1730    fn test_parse_reference_qualified() {
1731        let r = parse_reference("@User:123").unwrap();
1732        assert_eq!(r.type_name, Some("User".to_string().into()));
1733        assert_eq!(r.id, "123".into());
1734    }
1735
1736    #[test]
1737    fn test_parse_reference_local() {
1738        let r = parse_reference("@123").unwrap();
1739        assert_eq!(r.type_name, None);
1740        assert_eq!(r.id, "123".into());
1741    }
1742
1743    #[test]
1744    fn test_parse_reference_invalid() {
1745        let result = parse_reference("User:123");
1746        assert!(result.is_err());
1747    }
1748
1749    // ==================== is_tensor_array tests ====================
1750
1751    #[test]
1752    fn test_is_tensor_array_numbers() {
1753        let arr = vec![json!(1), json!(2), json!(3)];
1754        assert!(is_tensor_array(&arr));
1755    }
1756
1757    #[test]
1758    fn test_is_tensor_array_nested() {
1759        let arr = vec![json!([1, 2]), json!([3, 4])];
1760        assert!(is_tensor_array(&arr));
1761    }
1762
1763    #[test]
1764    fn test_is_tensor_array_empty() {
1765        let arr: Vec<JsonValue> = vec![];
1766        assert!(!is_tensor_array(&arr));
1767    }
1768
1769    #[test]
1770    fn test_is_tensor_array_with_strings() {
1771        let arr = vec![json!(1), json!("not a tensor")];
1772        assert!(!is_tensor_array(&arr));
1773    }
1774
1775    #[test]
1776    fn test_is_tensor_array_with_objects() {
1777        let arr = vec![json!({"id": 1})];
1778        assert!(!is_tensor_array(&arr));
1779    }
1780
1781    // ==================== is_object_array tests ====================
1782
1783    #[test]
1784    fn test_is_object_array_true() {
1785        let arr = vec![json!({"id": 1}), json!({"id": 2})];
1786        assert!(is_object_array(&arr));
1787    }
1788
1789    #[test]
1790    fn test_is_object_array_empty() {
1791        let arr: Vec<JsonValue> = vec![];
1792        assert!(!is_object_array(&arr));
1793    }
1794
1795    #[test]
1796    fn test_is_object_array_mixed() {
1797        let arr = vec![json!({"id": 1}), json!(123)];
1798        assert!(!is_object_array(&arr));
1799    }
1800
1801    // ==================== json_array_to_tensor tests ====================
1802
1803    #[test]
1804    fn test_json_array_to_tensor_1d() {
1805        let arr = vec![json!(1.0), json!(2.0), json!(3.0)];
1806        let config = FromJsonConfig::default();
1807        let tensor = json_array_to_tensor(&arr, &config, 0).unwrap();
1808        assert_eq!(tensor.flatten(), vec![1.0, 2.0, 3.0]);
1809    }
1810
1811    #[test]
1812    fn test_json_array_to_tensor_2d() {
1813        let arr = vec![json!([1.0, 2.0]), json!([3.0, 4.0])];
1814        let config = FromJsonConfig::default();
1815        let tensor = json_array_to_tensor(&arr, &config, 0).unwrap();
1816        assert_eq!(tensor.flatten(), vec![1.0, 2.0, 3.0, 4.0]);
1817    }
1818
1819    #[test]
1820    fn test_json_array_to_tensor_invalid_element() {
1821        let arr = vec![json!(1.0), json!("not a number")];
1822        let config = FromJsonConfig::default();
1823        let result = json_array_to_tensor(&arr, &config, 0);
1824        assert!(result.is_err());
1825    }
1826
1827    // ==================== json_to_value tests ====================
1828
1829    #[test]
1830    fn test_json_to_value_null() {
1831        let config = FromJsonConfig::default();
1832        let result = json_to_value(&JsonValue::Null, &config).unwrap();
1833        assert!(matches!(result, Value::Null));
1834    }
1835
1836    #[test]
1837    fn test_json_to_value_bool() {
1838        let config = FromJsonConfig::default();
1839        let result = json_to_value(&json!(true), &config).unwrap();
1840        assert!(matches!(result, Value::Bool(true)));
1841
1842        let result = json_to_value(&json!(false), &config).unwrap();
1843        assert!(matches!(result, Value::Bool(false)));
1844    }
1845
1846    #[test]
1847    fn test_json_to_value_int() {
1848        let config = FromJsonConfig::default();
1849        let result = json_to_value(&json!(42), &config).unwrap();
1850        assert!(matches!(result, Value::Int(42)));
1851    }
1852
1853    #[test]
1854    fn test_json_to_value_float() {
1855        let config = FromJsonConfig::default();
1856        let result = json_to_value(&json!(3.5), &config).unwrap();
1857        if let Value::Float(f) = result {
1858            assert!((f - 3.5).abs() < 0.001);
1859        } else {
1860            panic!("Expected Float");
1861        }
1862    }
1863
1864    #[test]
1865    fn test_json_to_value_string() {
1866        let config = FromJsonConfig::default();
1867        let result = json_to_value(&json!("hello"), &config).unwrap();
1868        assert!(matches!(result, Value::String(ref s) if s.as_ref() == "hello"));
1869    }
1870
1871    #[test]
1872    fn test_json_to_value_expression() {
1873        let config = FromJsonConfig::default();
1874        let result = json_to_value(&json!("$(foo)"), &config).unwrap();
1875        assert!(matches!(result, Value::Expression(_)));
1876    }
1877
1878    #[test]
1879    fn test_json_to_value_tensor() {
1880        let config = FromJsonConfig::default();
1881        let result = json_to_value(&json!([1.0, 2.0, 3.0]), &config).unwrap();
1882        if let Value::Tensor(t) = result {
1883            assert_eq!(t.flatten(), vec![1.0, 2.0, 3.0]);
1884        } else {
1885            panic!("Expected Tensor");
1886        }
1887    }
1888
1889    #[test]
1890    fn test_json_to_value_reference() {
1891        let config = FromJsonConfig::default();
1892        let result = json_to_value(&json!({"@ref": "@User:123"}), &config).unwrap();
1893        if let Value::Reference(r) = result {
1894            assert_eq!(r.type_name, Some("User".to_string().into()));
1895            assert_eq!(r.id, "123".into());
1896        } else {
1897            panic!("Expected Reference");
1898        }
1899    }
1900
1901    #[test]
1902    fn test_json_to_value_nested_object_error() {
1903        let config = FromJsonConfig::default();
1904        let result = json_to_value(&json!({"key": "value"}), &config);
1905        assert!(result.is_err());
1906    }
1907
1908    // ==================== from_json tests ====================
1909
1910    #[test]
1911    fn test_from_json_empty_object() {
1912        let json = "{}";
1913        let config = FromJsonConfig::default();
1914        let doc = from_json(json, &config).unwrap();
1915        assert!(doc.root.is_empty());
1916        assert_eq!(doc.version, (1, 0));
1917    }
1918
1919    #[test]
1920    fn test_from_json_simple_scalars() {
1921        let json = r#"{"name": "test", "count": 42, "active": true}"#;
1922        let config = FromJsonConfig::default();
1923        let doc = from_json(json, &config).unwrap();
1924        assert!(doc.root.contains_key("name"));
1925        assert!(doc.root.contains_key("count"));
1926        assert!(doc.root.contains_key("active"));
1927    }
1928
1929    #[test]
1930    fn test_from_json_nested_object() {
1931        let json = r#"{"outer": {"inner": "value"}}"#;
1932        let config = FromJsonConfig::default();
1933        let doc = from_json(json, &config).unwrap();
1934        if let Item::Object(obj) = &doc.root["outer"] {
1935            assert!(obj.contains_key("inner"));
1936        } else {
1937            panic!("Expected Object");
1938        }
1939    }
1940
1941    #[test]
1942    fn test_from_json_array_of_objects() {
1943        let json = r#"{"users": [{"id": "1", "name": "Alice"}]}"#;
1944        let config = FromJsonConfig::default();
1945        let doc = from_json(json, &config).unwrap();
1946        if let Item::List(list) = &doc.root["users"] {
1947            assert_eq!(list.type_name, "User");
1948            assert_eq!(list.rows.len(), 1);
1949        } else {
1950            panic!("Expected List");
1951        }
1952    }
1953
1954    #[test]
1955    fn test_from_json_tensor() {
1956        let json = r#"{"data": [1, 2, 3]}"#;
1957        let config = FromJsonConfig::default();
1958        let doc = from_json(json, &config).unwrap();
1959        if let Item::Scalar(Value::Tensor(t)) = &doc.root["data"] {
1960            assert_eq!(t.flatten(), vec![1.0, 2.0, 3.0]);
1961        } else {
1962            panic!("Expected Tensor");
1963        }
1964    }
1965
1966    #[test]
1967    fn test_from_json_invalid_json() {
1968        let json = "not valid json";
1969        let config = FromJsonConfig::default();
1970        let result = from_json(json, &config);
1971        assert!(result.is_err());
1972    }
1973
1974    #[test]
1975    fn test_from_json_non_object_root() {
1976        let json = "[1, 2, 3]";
1977        let config = FromJsonConfig::default();
1978        let result = from_json(json, &config);
1979        assert!(result.is_err());
1980    }
1981
1982    // ==================== from_json_value tests ====================
1983
1984    #[test]
1985    fn test_from_json_value_simple() {
1986        let value = json!({"key": 42});
1987        let config = FromJsonConfig::default();
1988        let doc = from_json_value(&value, &config).unwrap();
1989        if let Item::Scalar(Value::Int(n)) = &doc.root["key"] {
1990            assert_eq!(*n, 42);
1991        } else {
1992            panic!("Expected Int");
1993        }
1994    }
1995
1996    // ==================== json_value_to_item tests ====================
1997
1998    #[test]
1999    fn test_json_value_to_item_null() {
2000        let config = FromJsonConfig::default();
2001        let mut structs = BTreeMap::new();
2002        let mut schema_cache = SchemaCache::new();
2003        let result = json_value_to_item(
2004            &JsonValue::Null,
2005            "test",
2006            &config,
2007            &mut structs,
2008            &mut schema_cache,
2009            0,
2010        )
2011        .unwrap();
2012        assert!(matches!(result, Item::Scalar(Value::Null)));
2013    }
2014
2015    #[test]
2016    fn test_json_value_to_item_bool() {
2017        let config = FromJsonConfig::default();
2018        let mut structs = BTreeMap::new();
2019        let mut schema_cache = SchemaCache::new();
2020        let result = json_value_to_item(
2021            &json!(true),
2022            "test",
2023            &config,
2024            &mut structs,
2025            &mut schema_cache,
2026            0,
2027        )
2028        .unwrap();
2029        assert!(matches!(result, Item::Scalar(Value::Bool(true))));
2030    }
2031
2032    #[test]
2033    fn test_json_value_to_item_empty_array() {
2034        let config = FromJsonConfig::default();
2035        let mut structs = BTreeMap::new();
2036        let mut schema_cache = SchemaCache::new();
2037        let result = json_value_to_item(
2038            &json!([]),
2039            "items",
2040            &config,
2041            &mut structs,
2042            &mut schema_cache,
2043            0,
2044        )
2045        .unwrap();
2046        if let Item::List(list) = result {
2047            assert!(list.rows.is_empty());
2048            assert_eq!(list.type_name, "Item");
2049        } else {
2050            panic!("Expected List");
2051        }
2052    }
2053
2054    // ==================== Schema inference tests ====================
2055
2056    #[test]
2057    fn test_schema_inference_id_first() {
2058        let json = r#"{"users": [{"name": "Alice", "id": "1", "age": 30}]}"#;
2059        let config = FromJsonConfig::default();
2060        let doc = from_json(json, &config).unwrap();
2061        if let Item::List(list) = &doc.root["users"] {
2062            assert_eq!(list.schema[0], "id"); // id should be first
2063        } else {
2064            panic!("Expected List");
2065        }
2066    }
2067
2068    #[test]
2069    fn test_struct_registration() {
2070        let json = r#"{"users": [{"id": "1"}]}"#;
2071        let config = FromJsonConfig::default();
2072        let doc = from_json(json, &config).unwrap();
2073        assert!(doc.structs.contains_key("User"));
2074    }
2075
2076    // ==================== Security limit tests ====================
2077
2078    #[test]
2079    fn test_max_depth_exceeded() {
2080        // Test with custom low limit for faster testing
2081        // Default is now 10,000 which is too deep to test efficiently
2082        let json = r#"{"a":1}"#;
2083
2084        let config = FromJsonConfig {
2085            default_type_name: "Item".to_string(),
2086            version: (1, 0),
2087            max_depth: Some(0), // Fail on any value
2088            max_array_size: Some(100_000),
2089            max_string_length: Some(10_000_000),
2090            max_object_size: Some(10_000),
2091            surrogate_policy: SurrogatePolicy::Reject,
2092            #[cfg(feature = "lenient")]
2093            lenient: false,
2094        };
2095
2096        let result = from_json(json, &config);
2097        assert!(result.is_err(), "Expected error for depth 0");
2098        let err_msg = result.unwrap_err().to_string();
2099        assert!(err_msg.contains("Maximum recursion depth"));
2100    }
2101
2102    #[test]
2103    fn test_max_array_size_exceeded() {
2104        let config = FromJsonConfig {
2105            default_type_name: "Item".to_string(),
2106            version: (1, 0),
2107            max_depth: Some(100),
2108            max_array_size: Some(10), // Small limit for testing
2109            max_string_length: Some(10_000_000),
2110            max_object_size: Some(10_000),
2111            surrogate_policy: SurrogatePolicy::Reject,
2112            #[cfg(feature = "lenient")]
2113            lenient: false,
2114        };
2115
2116        // Create array with 11 elements
2117        let json = r#"{"items": [1,2,3,4,5,6,7,8,9,10,11]}"#;
2118        let result = from_json(json, &config);
2119        assert!(result.is_err());
2120        let err_msg = result.unwrap_err().to_string();
2121        assert!(err_msg.contains("Maximum array size"));
2122    }
2123
2124    #[test]
2125    fn test_max_string_length_exceeded() {
2126        let config = FromJsonConfig {
2127            default_type_name: "Item".to_string(),
2128            version: (1, 0),
2129            max_depth: Some(100),
2130            max_array_size: Some(100_000),
2131            max_string_length: Some(100), // Small limit for testing
2132            max_object_size: Some(10_000),
2133            surrogate_policy: SurrogatePolicy::Reject,
2134            #[cfg(feature = "lenient")]
2135            lenient: false,
2136        };
2137
2138        // Create string with 101 characters
2139        let long_string = "a".repeat(101);
2140        let json = format!(r#"{{"text": "{long_string}"}}"#);
2141        let result = from_json(&json, &config);
2142        assert!(result.is_err());
2143        let err_msg = result.unwrap_err().to_string();
2144        assert!(err_msg.contains("Maximum string length"));
2145    }
2146
2147    #[test]
2148    fn test_max_object_size_exceeded() {
2149        let config = FromJsonConfig {
2150            default_type_name: "Item".to_string(),
2151            version: (1, 0),
2152            max_depth: Some(100),
2153            max_array_size: Some(100_000),
2154            max_string_length: Some(10_000_000),
2155            max_object_size: Some(5), // Small limit for testing
2156            surrogate_policy: SurrogatePolicy::Reject,
2157            #[cfg(feature = "lenient")]
2158            lenient: false,
2159        };
2160
2161        // Create object with 6 keys
2162        let json = r#"{"a":1,"b":2,"c":3,"d":4,"e":5,"f":6}"#;
2163        let result = from_json(json, &config);
2164        assert!(result.is_err());
2165        let err_msg = result.unwrap_err().to_string();
2166        assert!(err_msg.contains("Maximum object size"));
2167    }
2168
2169    #[test]
2170    fn test_limits_disabled() {
2171        let config = FromJsonConfig {
2172            default_type_name: "Item".to_string(),
2173            version: (1, 0),
2174            max_depth: None, // Disabled
2175            max_array_size: None,
2176            max_string_length: None,
2177            max_object_size: None,
2178            surrogate_policy: SurrogatePolicy::Reject,
2179            #[cfg(feature = "lenient")]
2180            lenient: false,
2181        };
2182
2183        // These would fail with limits enabled
2184        let long_string = "a".repeat(1000);
2185        let json = format!(r#"{{"text": "{long_string}"}}"#);
2186        let result = from_json(&json, &config);
2187        assert!(result.is_ok());
2188    }
2189
2190    #[test]
2191    fn test_error_message_quality() {
2192        let config = FromJsonConfig::default();
2193
2194        // Test various error types
2195        let result1 = from_json("not json", &config);
2196        assert!(result1
2197            .unwrap_err()
2198            .to_string()
2199            .contains("JSON parse error"));
2200
2201        let result2 = from_json("[1,2,3]", &config);
2202        assert!(result2.unwrap_err().to_string().contains("Root must be"));
2203
2204        let result3 = from_json(r#"{"ref": {"@ref": "bad"}}"#, &config);
2205        assert!(result3.is_err()); // Invalid reference
2206    }
2207}
2208
2209// ============================================================================
2210// PARTIAL PARSING IMPLEMENTATION
2211// ============================================================================
2212
2213/// Error tolerance strategy for partial parsing
2214///
2215/// Determines how the parser should behave when encountering errors.
2216#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
2217pub enum ErrorTolerance {
2218    /// Stop on the first error encountered
2219    #[default]
2220    StopOnFirst,
2221
2222    /// Collect up to N errors before stopping
2223    MaxErrors(usize),
2224
2225    /// Collect all errors and continue parsing
2226    CollectAll,
2227
2228    /// Skip invalid items in arrays/objects and continue
2229    SkipInvalidItems,
2230}
2231
2232/// Location information for an error
2233#[derive(Debug, Clone, PartialEq, Eq)]
2234pub struct ErrorLocation {
2235    /// JSON path to the error (e.g., "$.users[2].email")
2236    pub path: String,
2237
2238    /// Depth in the JSON structure
2239    pub depth: usize,
2240}
2241
2242impl ErrorLocation {
2243    fn root() -> Self {
2244        Self {
2245            path: "$".to_string(),
2246            depth: 0,
2247        }
2248    }
2249
2250    fn child(&self, key: &str) -> Self {
2251        Self {
2252            path: format!("{}.{}", self.path, key),
2253            depth: self.depth + 1,
2254        }
2255    }
2256
2257    fn index(&self, idx: usize) -> Self {
2258        Self {
2259            path: format!("{}[{}]", self.path, idx),
2260            depth: self.depth + 1,
2261        }
2262    }
2263}
2264
2265/// Captured error during partial parsing
2266#[derive(Debug, Clone)]
2267pub struct ParseError {
2268    /// The error that occurred
2269    pub error: JsonConversionError,
2270
2271    /// Location where the error occurred
2272    pub location: ErrorLocation,
2273
2274    /// Whether this error is fatal (prevents document creation)
2275    pub is_fatal: bool,
2276}
2277
2278impl ParseError {
2279    fn new(error: JsonConversionError, location: ErrorLocation, is_fatal: bool) -> Self {
2280        Self {
2281            error,
2282            location,
2283            is_fatal,
2284        }
2285    }
2286}
2287
2288/// Configuration for partial parsing
2289#[derive(Debug, Clone, Default)]
2290pub struct PartialConfig {
2291    /// Base configuration for JSON conversion
2292    pub from_json_config: FromJsonConfig,
2293
2294    /// Error tolerance strategy
2295    pub tolerance: ErrorTolerance,
2296
2297    /// Whether to include partial results even on fatal errors
2298    pub include_partial_on_fatal: bool,
2299
2300    /// Replace invalid values with null instead of skipping
2301    pub replace_invalid_with_null: bool,
2302}
2303
2304impl PartialConfig {
2305    /// Create a new builder for partial parsing configuration
2306    #[must_use]
2307    pub fn builder() -> PartialConfigBuilder {
2308        PartialConfigBuilder::default()
2309    }
2310}
2311
2312/// Builder for `PartialConfig`
2313#[derive(Debug, Clone, Default)]
2314pub struct PartialConfigBuilder {
2315    from_json_config: FromJsonConfig,
2316    tolerance: ErrorTolerance,
2317    include_partial_on_fatal: bool,
2318    replace_invalid_with_null: bool,
2319}
2320
2321impl PartialConfigBuilder {
2322    /// Set the base `FromJsonConfig`
2323    #[must_use]
2324    pub fn from_json_config(mut self, config: FromJsonConfig) -> Self {
2325        self.from_json_config = config;
2326        self
2327    }
2328
2329    /// Set the error tolerance strategy
2330    #[must_use]
2331    pub fn tolerance(mut self, tolerance: ErrorTolerance) -> Self {
2332        self.tolerance = tolerance;
2333        self
2334    }
2335
2336    /// Set whether to include partial results on fatal errors
2337    #[must_use]
2338    pub fn include_partial_on_fatal(mut self, value: bool) -> Self {
2339        self.include_partial_on_fatal = value;
2340        self
2341    }
2342
2343    /// Set whether to replace invalid values with null
2344    #[must_use]
2345    pub fn replace_invalid_with_null(mut self, value: bool) -> Self {
2346        self.replace_invalid_with_null = value;
2347        self
2348    }
2349
2350    /// Build the `PartialConfig`
2351    #[must_use]
2352    pub fn build(self) -> PartialConfig {
2353        PartialConfig {
2354            from_json_config: self.from_json_config,
2355            tolerance: self.tolerance,
2356            include_partial_on_fatal: self.include_partial_on_fatal,
2357            replace_invalid_with_null: self.replace_invalid_with_null,
2358        }
2359    }
2360}
2361
2362/// Result of partial parsing
2363#[derive(Debug)]
2364pub struct PartialResult {
2365    /// Parsed document (if any)
2366    pub document: Option<Document>,
2367
2368    /// All errors encountered during parsing
2369    pub errors: Vec<ParseError>,
2370
2371    /// Whether parsing stopped early due to error limits
2372    pub stopped_early: bool,
2373}
2374
2375impl PartialResult {
2376    /// Check if parsing completed successfully without errors
2377    #[must_use]
2378    pub fn is_complete(&self) -> bool {
2379        self.errors.is_empty() && self.document.is_some()
2380    }
2381
2382    /// Check if parsing failed (fatal errors or no document)
2383    #[must_use]
2384    pub fn is_failed(&self) -> bool {
2385        self.errors.iter().any(|e| e.is_fatal) || self.document.is_none()
2386    }
2387
2388    /// Convert to Result type for simpler error handling
2389    pub fn into_result(self) -> Result<Document, Vec<ParseError>> {
2390        if self.errors.is_empty() {
2391            self.document.ok_or_else(Vec::new)
2392        } else {
2393            Err(self.errors)
2394        }
2395    }
2396}
2397
2398/// Error collection context for partial parsing
2399struct ErrorContext {
2400    errors: Vec<ParseError>,
2401    config: PartialConfig,
2402    stopped: bool,
2403}
2404
2405impl ErrorContext {
2406    fn new(config: PartialConfig) -> Self {
2407        Self {
2408            errors: Vec::new(),
2409            config,
2410            stopped: false,
2411        }
2412    }
2413
2414    /// Record an error and determine if parsing should continue
2415    fn record_error(
2416        &mut self,
2417        error: JsonConversionError,
2418        location: ErrorLocation,
2419        is_fatal: bool,
2420    ) -> bool {
2421        if self.stopped {
2422            return false;
2423        }
2424
2425        let parse_error = ParseError::new(error, location, is_fatal);
2426        self.errors.push(parse_error);
2427
2428        // Check if we should stop
2429        let should_stop = match self.config.tolerance {
2430            ErrorTolerance::StopOnFirst => true,
2431            ErrorTolerance::MaxErrors(max) => self.errors.len() >= max,
2432            ErrorTolerance::CollectAll => false,
2433            ErrorTolerance::SkipInvalidItems => is_fatal,
2434        };
2435
2436        if should_stop {
2437            self.stopped = true;
2438        }
2439
2440        !should_stop
2441    }
2442
2443    fn should_continue(&self) -> bool {
2444        !self.stopped
2445    }
2446}
2447
2448/// Parse JSON string with partial error recovery
2449///
2450/// This function attempts to parse as much of the JSON as possible,
2451/// collecting errors instead of failing on the first error.
2452///
2453/// # Examples
2454///
2455/// ```text
2456/// use hedl_json::from_json::{partial_parse_json, PartialConfig, ErrorTolerance};
2457///
2458/// let json = r#"{"valid": "data", "invalid": ...}"#;
2459/// let config = PartialConfig::builder()
2460///     .tolerance(ErrorTolerance::CollectAll)
2461///     .build();
2462///
2463/// let result = partial_parse_json(json, &config);
2464/// assert!(result.document.is_some());
2465/// assert!(!result.errors.is_empty());
2466/// ```
2467#[must_use]
2468pub fn partial_parse_json(json: &str, config: &PartialConfig) -> PartialResult {
2469    // Try to parse JSON first
2470    let value = match serde_json::from_str::<JsonValue>(json) {
2471        Ok(v) => v,
2472        Err(e) => {
2473            // Fatal JSON parsing error
2474            return PartialResult {
2475                document: None,
2476                errors: vec![ParseError::new(
2477                    JsonConversionError::ParseError(e.to_string()),
2478                    ErrorLocation::root(),
2479                    true,
2480                )],
2481                stopped_early: false,
2482            };
2483        }
2484    };
2485
2486    partial_parse_json_value(&value, config)
2487}
2488
2489/// Parse `serde_json::Value` with partial error recovery
2490#[must_use]
2491pub fn partial_parse_json_value(value: &JsonValue, config: &PartialConfig) -> PartialResult {
2492    let mut context = ErrorContext::new(config.clone());
2493    let mut structs = BTreeMap::new();
2494    let mut schema_cache = SchemaCache::new();
2495
2496    // Try to parse the root
2497    let root = if let JsonValue::Object(map) = value {
2498        match partial_json_object_to_root(
2499            map,
2500            &config.from_json_config,
2501            &mut structs,
2502            &mut schema_cache,
2503            0,
2504            &ErrorLocation::root(),
2505            &mut context,
2506        ) {
2507            Ok(root) => Some(root),
2508            Err(_) => {
2509                if config.include_partial_on_fatal {
2510                    Some(BTreeMap::new())
2511                } else {
2512                    None
2513                }
2514            }
2515        }
2516    } else {
2517        context.record_error(
2518            JsonConversionError::InvalidRoot(format!("{value:?}")),
2519            ErrorLocation::root(),
2520            true,
2521        );
2522        None
2523    };
2524
2525    let document = root.map(|root| Document {
2526        version: config.from_json_config.version,
2527        schema_versions: BTreeMap::new(),
2528        aliases: BTreeMap::new(),
2529        structs,
2530        nests: BTreeMap::new(),
2531        root,
2532    });
2533
2534    PartialResult {
2535        document,
2536        errors: context.errors,
2537        stopped_early: context.stopped,
2538    }
2539}
2540
2541/// Partial parsing version of `json_object_to_root`
2542fn partial_json_object_to_root(
2543    map: &Map<String, JsonValue>,
2544    config: &FromJsonConfig,
2545    structs: &mut BTreeMap<String, Vec<String>>,
2546    schema_cache: &mut SchemaCache,
2547    depth: usize,
2548    location: &ErrorLocation,
2549    context: &mut ErrorContext,
2550) -> Result<BTreeMap<String, Item>, JsonConversionError> {
2551    // Check object size limit
2552    if let Some(max_size) = config.max_object_size {
2553        if map.len() > max_size {
2554            let err = JsonConversionError::MaxObjectSizeExceeded(max_size, map.len());
2555            context.record_error(err.clone(), location.clone(), false);
2556            return Err(err);
2557        }
2558    }
2559
2560    let mut result = BTreeMap::new();
2561
2562    for (key, value) in map {
2563        if !context.should_continue() {
2564            break;
2565        }
2566
2567        // Skip metadata keys
2568        if key.starts_with("__") {
2569            continue;
2570        }
2571
2572        let item_location = location.child(key);
2573        match partial_json_value_to_item(
2574            value,
2575            key,
2576            config,
2577            structs,
2578            schema_cache,
2579            depth,
2580            &item_location,
2581            context,
2582        ) {
2583            Ok(item) => {
2584                result.insert(key.clone(), item);
2585            }
2586            Err(_) => {
2587                // Error already recorded in partial_json_value_to_item
2588                if context.config.replace_invalid_with_null {
2589                    result.insert(key.clone(), Item::Scalar(Value::Null));
2590                }
2591                // Otherwise skip this item
2592            }
2593        }
2594    }
2595
2596    Ok(result)
2597}
2598
2599/// Partial parsing version of `json_value_to_item`
2600#[allow(clippy::too_many_arguments)]
2601fn partial_json_value_to_item(
2602    value: &JsonValue,
2603    key: &str,
2604    config: &FromJsonConfig,
2605    structs: &mut BTreeMap<String, Vec<String>>,
2606    schema_cache: &mut SchemaCache,
2607    depth: usize,
2608    location: &ErrorLocation,
2609    context: &mut ErrorContext,
2610) -> Result<Item, JsonConversionError> {
2611    // Check recursion depth
2612    if let Some(max_depth) = config.max_depth {
2613        if depth >= max_depth {
2614            let err = JsonConversionError::MaxDepthExceeded(max_depth);
2615            context.record_error(err.clone(), location.clone(), false);
2616            return Err(err);
2617        }
2618    }
2619
2620    match value {
2621        JsonValue::Null => Ok(Item::Scalar(Value::Null)),
2622        JsonValue::Bool(b) => Ok(Item::Scalar(Value::Bool(*b))),
2623        JsonValue::Number(n) => match json_number_to_value(n) {
2624            Ok(value) => Ok(Item::Scalar(value)),
2625            Err(err) => {
2626                context.record_error(err.clone(), location.clone(), false);
2627                Err(err)
2628            }
2629        },
2630        JsonValue::String(s) => {
2631            // Check string length limit
2632            if let Some(max_len) = config.max_string_length {
2633                if s.len() > max_len {
2634                    let err = JsonConversionError::MaxStringLengthExceeded(max_len, s.len());
2635                    context.record_error(err.clone(), location.clone(), false);
2636                    return Err(err);
2637                }
2638            }
2639
2640            // Check for expression pattern $( ... )
2641            if s.starts_with("$(") && s.ends_with(')') {
2642                match parse_expression_token(s) {
2643                    Ok(expr) => Ok(Item::Scalar(Value::Expression(Box::new(expr)))),
2644                    Err(e) => {
2645                        let err = JsonConversionError::InvalidExpression(e.to_string());
2646                        context.record_error(err.clone(), location.clone(), false);
2647                        Err(err)
2648                    }
2649                }
2650            } else {
2651                Ok(Item::Scalar(Value::String(s.clone().into_boxed_str())))
2652            }
2653        }
2654        JsonValue::Array(arr) => {
2655            // Check array size limit
2656            if let Some(max_size) = config.max_array_size {
2657                if arr.len() > max_size {
2658                    let err = JsonConversionError::MaxArraySizeExceeded(max_size, arr.len());
2659                    context.record_error(err.clone(), location.clone(), false);
2660                    return Err(err);
2661                }
2662            }
2663
2664            // Handle empty arrays
2665            if arr.is_empty() {
2666                let type_name = singularize_and_capitalize(key);
2667                let schema: Vec<String> = DEFAULT_SCHEMA.iter().map(|s| (*s).to_string()).collect();
2668                let mut list = MatrixList::new(type_name.clone(), schema.clone());
2669                list.count_hint = Some(0);
2670                structs.insert(type_name, schema);
2671                Ok(Item::List(list))
2672            } else if is_tensor_array(arr) {
2673                match partial_json_array_to_tensor(arr, config, depth + 1, location, context) {
2674                    Ok(tensor) => Ok(Item::Scalar(Value::Tensor(Box::new(tensor)))),
2675                    Err(err) => Err(err),
2676                }
2677            } else if is_object_array(arr) {
2678                match partial_json_array_to_matrix_list(
2679                    arr,
2680                    key,
2681                    config,
2682                    structs,
2683                    schema_cache,
2684                    depth + 1,
2685                    location,
2686                    context,
2687                ) {
2688                    Ok(list) => Ok(Item::List(list)),
2689                    Err(err) => Err(err),
2690                }
2691            } else {
2692                // Mixed array - try to convert to tensor
2693                match partial_json_array_to_tensor(arr, config, depth + 1, location, context) {
2694                    Ok(tensor) => Ok(Item::Scalar(Value::Tensor(Box::new(tensor)))),
2695                    Err(err) => Err(err),
2696                }
2697            }
2698        }
2699        JsonValue::Object(obj) => {
2700            // Check for special keys
2701            if let Some(JsonValue::String(r)) = obj.get("@ref") {
2702                match parse_reference(r) {
2703                    Ok(reference) => Ok(Item::Scalar(Value::Reference(reference))),
2704                    Err(e) => {
2705                        let err = JsonConversionError::InvalidReference(e);
2706                        context.record_error(err.clone(), location.clone(), false);
2707                        Err(err)
2708                    }
2709                }
2710            } else {
2711                // Regular object
2712                match partial_json_object_to_item_map(
2713                    obj,
2714                    config,
2715                    structs,
2716                    schema_cache,
2717                    depth + 1,
2718                    location,
2719                    context,
2720                ) {
2721                    Ok(item_map) => Ok(Item::Object(item_map)),
2722                    Err(err) => Err(err),
2723                }
2724            }
2725        }
2726    }
2727}
2728
2729/// Partial parsing version of `json_object_to_item_map`
2730fn partial_json_object_to_item_map(
2731    map: &Map<String, JsonValue>,
2732    config: &FromJsonConfig,
2733    structs: &mut BTreeMap<String, Vec<String>>,
2734    schema_cache: &mut SchemaCache,
2735    depth: usize,
2736    location: &ErrorLocation,
2737    context: &mut ErrorContext,
2738) -> Result<BTreeMap<String, Item>, JsonConversionError> {
2739    // Check object size limit
2740    if let Some(max_size) = config.max_object_size {
2741        if map.len() > max_size {
2742            let err = JsonConversionError::MaxObjectSizeExceeded(max_size, map.len());
2743            context.record_error(err.clone(), location.clone(), false);
2744            return Err(err);
2745        }
2746    }
2747
2748    let mut result = BTreeMap::new();
2749
2750    for (key, value) in map {
2751        if !context.should_continue() {
2752            break;
2753        }
2754
2755        if key.starts_with("__") {
2756            continue;
2757        }
2758
2759        let item_location = location.child(key);
2760        match partial_json_value_to_item(
2761            value,
2762            key,
2763            config,
2764            structs,
2765            schema_cache,
2766            depth,
2767            &item_location,
2768            context,
2769        ) {
2770            Ok(item) => {
2771                result.insert(key.clone(), item);
2772            }
2773            Err(_) => {
2774                if context.config.replace_invalid_with_null {
2775                    result.insert(key.clone(), Item::Scalar(Value::Null));
2776                }
2777            }
2778        }
2779    }
2780
2781    Ok(result)
2782}
2783
2784/// Partial parsing version of `json_array_to_tensor`
2785fn partial_json_array_to_tensor(
2786    arr: &[JsonValue],
2787    config: &FromJsonConfig,
2788    depth: usize,
2789    location: &ErrorLocation,
2790    context: &mut ErrorContext,
2791) -> Result<Tensor, JsonConversionError> {
2792    // Check recursion depth
2793    if let Some(max_depth) = config.max_depth {
2794        if depth >= max_depth {
2795            let err = JsonConversionError::MaxDepthExceeded(max_depth);
2796            context.record_error(err.clone(), location.clone(), false);
2797            return Err(err);
2798        }
2799    }
2800
2801    let mut items = Vec::with_capacity(arr.len());
2802
2803    for (idx, v) in arr.iter().enumerate() {
2804        if !context.should_continue() {
2805            break;
2806        }
2807
2808        let elem_location = location.index(idx);
2809        let tensor = match v {
2810            JsonValue::Number(n) => {
2811                // Tensors use f64, but we should detect integer overflow
2812                // Note: For tensors, overflow to float is acceptable but we still check
2813                if is_integer_overflow(n) {
2814                    // For tensors, overflow to float is acceptable but worth noting
2815                    // in future versions, could add a warning mechanism
2816                }
2817
2818                if let Some(f) = n.as_f64() {
2819                    Ok(Tensor::Scalar(f))
2820                } else {
2821                    let err = JsonConversionError::InvalidNumber(n.to_string());
2822                    context.record_error(err.clone(), elem_location, false);
2823                    Err(err)
2824                }
2825            }
2826            JsonValue::Array(nested) => {
2827                partial_json_array_to_tensor(nested, config, depth + 1, &elem_location, context)
2828            }
2829            _ => {
2830                let err = JsonConversionError::InvalidTensor;
2831                context.record_error(err.clone(), elem_location, false);
2832                Err(err)
2833            }
2834        };
2835
2836        match tensor {
2837            Ok(t) => items.push(t),
2838            Err(_) => {
2839                if context.config.replace_invalid_with_null {
2840                    items.push(Tensor::Scalar(0.0));
2841                }
2842                // Otherwise skip this item
2843            }
2844        }
2845    }
2846
2847    Ok(Tensor::Array(items))
2848}
2849
2850/// Partial parsing version of `json_array_to_matrix_list`
2851#[allow(clippy::too_many_arguments)]
2852fn partial_json_array_to_matrix_list(
2853    arr: &[JsonValue],
2854    key: &str,
2855    config: &FromJsonConfig,
2856    structs: &mut BTreeMap<String, Vec<String>>,
2857    schema_cache: &mut SchemaCache,
2858    depth: usize,
2859    location: &ErrorLocation,
2860    context: &mut ErrorContext,
2861) -> Result<MatrixList, JsonConversionError> {
2862    // Check recursion depth
2863    if let Some(max_depth) = config.max_depth {
2864        if depth >= max_depth {
2865            let err = JsonConversionError::MaxDepthExceeded(max_depth);
2866            context.record_error(err.clone(), location.clone(), false);
2867            return Err(err);
2868        }
2869    }
2870
2871    let type_name = singularize_and_capitalize(key);
2872
2873    // Infer schema from first object
2874    let schema: Vec<String> = if let Some(JsonValue::Object(first)) = arr.first() {
2875        if let Some(JsonValue::Array(schema_arr)) = first.get("__hedl_schema") {
2876            schema_arr
2877                .iter()
2878                .filter_map(|v| v.as_str().map(String::from))
2879                .collect()
2880        } else {
2881            let mut cache_key: Vec<String> = first
2882                .keys()
2883                .filter(|k| {
2884                    if k.starts_with("__") {
2885                        return false;
2886                    }
2887                    if let Some(JsonValue::Array(arr)) = first.get(*k) {
2888                        !is_object_array(arr)
2889                    } else {
2890                        true
2891                    }
2892                })
2893                .cloned()
2894                .collect();
2895            cache_key.sort();
2896
2897            if let Some(cached_schema) = schema_cache.get(&cache_key) {
2898                cached_schema.clone()
2899            } else {
2900                let mut keys = cache_key.clone();
2901                if let Some(pos) = keys.iter().position(|k| k == "id") {
2902                    keys.remove(pos);
2903                    keys.insert(0, "id".to_string());
2904                }
2905                schema_cache.insert(cache_key, keys.clone());
2906                keys
2907            }
2908        }
2909    } else {
2910        DEFAULT_SCHEMA.iter().map(|s| (*s).to_string()).collect()
2911    };
2912
2913    let schema = if schema.is_empty() {
2914        DEFAULT_SCHEMA.iter().map(|s| (*s).to_string()).collect()
2915    } else {
2916        schema
2917    };
2918
2919    structs.insert(type_name.clone(), schema.clone());
2920
2921    let mut rows = Vec::with_capacity(arr.len());
2922
2923    for (idx, item) in arr.iter().enumerate() {
2924        if !context.should_continue() {
2925            break;
2926        }
2927
2928        let row_location = location.index(idx);
2929
2930        if let JsonValue::Object(obj) = item {
2931            let id = obj
2932                .get(&schema[0])
2933                .and_then(|v| v.as_str())
2934                .unwrap_or("")
2935                .to_string();
2936
2937            let mut fields = Vec::with_capacity(schema.len());
2938            for col in &schema {
2939                match obj.get(col) {
2940                    Some(v) => {
2941                        match partial_json_to_value(v, config, &row_location.child(col), context) {
2942                            Ok(value) => fields.push(value),
2943                            Err(_) => {
2944                                // Replace invalid values with null in partial mode
2945                                fields.push(Value::Null);
2946                            }
2947                        }
2948                    }
2949                    None => fields.push(Value::Null),
2950                }
2951            }
2952
2953            // Handle nested children
2954            let mut children: BTreeMap<String, Vec<Node>> = BTreeMap::new();
2955            for (child_key, child_value) in obj {
2956                if !context.should_continue() {
2957                    break;
2958                }
2959
2960                if let JsonValue::Array(child_arr) = child_value {
2961                    if is_object_array(child_arr) {
2962                        let child_location = row_location.child(child_key);
2963                        if let Ok(child_list) = partial_json_array_to_matrix_list(
2964                            child_arr,
2965                            child_key,
2966                            config,
2967                            structs,
2968                            schema_cache,
2969                            depth + 1,
2970                            &child_location,
2971                            context,
2972                        ) {
2973                            children.insert(child_key.clone(), child_list.rows);
2974                        } else {
2975                            // Error already recorded, skip this child
2976                        }
2977                    }
2978                }
2979            }
2980
2981            let node = Node {
2982                type_name: type_name.clone(),
2983                id,
2984                fields: fields.into(),
2985                children: if children.is_empty() {
2986                    None
2987                } else {
2988                    Some(Box::new(children))
2989                },
2990                child_count: 0,
2991            };
2992
2993            rows.push(node);
2994        } else {
2995            // Invalid item in array - record error
2996            let err = JsonConversionError::InvalidRoot("Expected object in array".to_string());
2997            context.record_error(err, row_location, false);
2998
2999            // Skip this item based on tolerance
3000            if context.config.tolerance == ErrorTolerance::SkipInvalidItems {
3001                continue;
3002            }
3003        }
3004    }
3005
3006    let count_hint = Some(rows.len());
3007
3008    Ok(MatrixList {
3009        type_name,
3010        schema,
3011        rows,
3012        count_hint,
3013    })
3014}
3015
3016/// Partial parsing version of `json_to_value`
3017fn partial_json_to_value(
3018    value: &JsonValue,
3019    config: &FromJsonConfig,
3020    location: &ErrorLocation,
3021    context: &mut ErrorContext,
3022) -> Result<Value, JsonConversionError> {
3023    match value {
3024        JsonValue::Null => Ok(Value::Null),
3025        JsonValue::Bool(b) => Ok(Value::Bool(*b)),
3026        JsonValue::Number(n) => match json_number_to_value(n) {
3027            Ok(value) => Ok(value),
3028            Err(err) => {
3029                context.record_error(err.clone(), location.clone(), false);
3030                Err(err)
3031            }
3032        },
3033        JsonValue::String(s) => {
3034            // Check string length limit
3035            if let Some(max_len) = config.max_string_length {
3036                if s.len() > max_len {
3037                    let err = JsonConversionError::MaxStringLengthExceeded(max_len, s.len());
3038                    context.record_error(err.clone(), location.clone(), false);
3039                    return Err(err);
3040                }
3041            }
3042
3043            // Check for expression pattern
3044            if s.starts_with("$(") && s.ends_with(')') {
3045                match parse_expression_token(s) {
3046                    Ok(expr) => Ok(Value::Expression(Box::new(expr))),
3047                    Err(e) => {
3048                        let err = JsonConversionError::InvalidExpression(e.to_string());
3049                        context.record_error(err.clone(), location.clone(), false);
3050                        Err(err)
3051                    }
3052                }
3053            } else {
3054                Ok(Value::String(s.clone().into_boxed_str()))
3055            }
3056        }
3057        JsonValue::Array(arr) => {
3058            // Check array size limit
3059            if let Some(max_size) = config.max_array_size {
3060                if arr.len() > max_size {
3061                    let err = JsonConversionError::MaxArraySizeExceeded(max_size, arr.len());
3062                    context.record_error(err.clone(), location.clone(), false);
3063                    return Err(err);
3064                }
3065            }
3066
3067            if is_object_array(arr) {
3068                Ok(Value::Null) // Children processed separately
3069            } else if is_tensor_array(arr) {
3070                match partial_json_array_to_tensor(arr, config, 0, location, context) {
3071                    Ok(tensor) => Ok(Value::Tensor(Box::new(tensor))),
3072                    Err(err) => Err(err),
3073                }
3074            } else if arr.is_empty() {
3075                Ok(Value::Tensor(Box::new(Tensor::Array(vec![]))))
3076            } else {
3077                match partial_json_array_to_tensor(arr, config, 0, location, context) {
3078                    Ok(tensor) => Ok(Value::Tensor(Box::new(tensor))),
3079                    Err(err) => Err(err),
3080                }
3081            }
3082        }
3083        JsonValue::Object(obj) => {
3084            if let Some(JsonValue::String(r)) = obj.get("@ref") {
3085                match parse_reference(r) {
3086                    Ok(reference) => Ok(Value::Reference(reference)),
3087                    Err(e) => {
3088                        let err = JsonConversionError::InvalidReference(e);
3089                        context.record_error(err.clone(), location.clone(), false);
3090                        Err(err)
3091                    }
3092                }
3093            } else {
3094                let err = JsonConversionError::NestedObject;
3095                context.record_error(err.clone(), location.clone(), false);
3096                Err(err)
3097            }
3098        }
3099    }
3100}